Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/vfp/vfpdouble.c | |
3 | * | |
4 | * This code is derived in part from John R. Housers softfloat library, which | |
5 | * carries the following notice: | |
6 | * | |
7 | * =========================================================================== | |
8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | |
9 | * Arithmetic Package, Release 2. | |
10 | * | |
11 | * Written by John R. Hauser. This work was made possible in part by the | |
12 | * International Computer Science Institute, located at Suite 600, 1947 Center | |
13 | * Street, Berkeley, California 94704. Funding was partially provided by the | |
14 | * National Science Foundation under grant MIP-9311980. The original version | |
15 | * of this code was written as part of a project to build a fixed-point vector | |
16 | * processor in collaboration with the University of California at Berkeley, | |
17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | |
18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | |
19 | * arithmetic/softfloat.html'. | |
20 | * | |
21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | |
22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | |
23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | |
24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | |
25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | |
26 | * | |
27 | * Derivative works are acceptable, even for commercial purposes, so long as | |
28 | * (1) they include prominent notice that the work is derivative, and (2) they | |
29 | * include prominent notice akin to these three paragraphs for those parts of | |
30 | * this code that are retained. | |
31 | * =========================================================================== | |
32 | */ | |
33 | #include <linux/kernel.h> | |
34 | #include <linux/bitops.h> | |
438a7616 RK |
35 | |
36 | #include <asm/div64.h> | |
1da177e4 LT |
37 | #include <asm/ptrace.h> |
38 | #include <asm/vfp.h> | |
39 | ||
40 | #include "vfpinstr.h" | |
41 | #include "vfp.h" | |
42 | ||
43 | static struct vfp_double vfp_double_default_qnan = { | |
44 | .exponent = 2047, | |
45 | .sign = 0, | |
46 | .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, | |
47 | }; | |
48 | ||
49 | static void vfp_double_dump(const char *str, struct vfp_double *d) | |
50 | { | |
51 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", | |
52 | str, d->sign != 0, d->exponent, d->significand); | |
53 | } | |
54 | ||
55 | static void vfp_double_normalise_denormal(struct vfp_double *vd) | |
56 | { | |
57 | int bits = 31 - fls(vd->significand >> 32); | |
58 | if (bits == 31) | |
59 | bits = 62 - fls(vd->significand); | |
60 | ||
61 | vfp_double_dump("normalise_denormal: in", vd); | |
62 | ||
63 | if (bits) { | |
64 | vd->exponent -= bits - 1; | |
65 | vd->significand <<= bits; | |
66 | } | |
67 | ||
68 | vfp_double_dump("normalise_denormal: out", vd); | |
69 | } | |
70 | ||
71 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | |
72 | { | |
73 | u64 significand, incr; | |
74 | int exponent, shift, underflow; | |
75 | u32 rmode; | |
76 | ||
77 | vfp_double_dump("pack: in", vd); | |
78 | ||
79 | /* | |
80 | * Infinities and NaNs are a special case. | |
81 | */ | |
82 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) | |
83 | goto pack; | |
84 | ||
85 | /* | |
86 | * Special-case zero. | |
87 | */ | |
88 | if (vd->significand == 0) { | |
89 | vd->exponent = 0; | |
90 | goto pack; | |
91 | } | |
92 | ||
93 | exponent = vd->exponent; | |
94 | significand = vd->significand; | |
95 | ||
96 | shift = 32 - fls(significand >> 32); | |
97 | if (shift == 32) | |
98 | shift = 64 - fls(significand); | |
99 | if (shift) { | |
100 | exponent -= shift; | |
101 | significand <<= shift; | |
102 | } | |
103 | ||
104 | #ifdef DEBUG | |
105 | vd->exponent = exponent; | |
106 | vd->significand = significand; | |
107 | vfp_double_dump("pack: normalised", vd); | |
108 | #endif | |
109 | ||
110 | /* | |
111 | * Tiny number? | |
112 | */ | |
113 | underflow = exponent < 0; | |
114 | if (underflow) { | |
115 | significand = vfp_shiftright64jamming(significand, -exponent); | |
116 | exponent = 0; | |
117 | #ifdef DEBUG | |
118 | vd->exponent = exponent; | |
119 | vd->significand = significand; | |
120 | vfp_double_dump("pack: tiny number", vd); | |
121 | #endif | |
122 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) | |
123 | underflow = 0; | |
124 | } | |
125 | ||
126 | /* | |
127 | * Select rounding increment. | |
128 | */ | |
129 | incr = 0; | |
130 | rmode = fpscr & FPSCR_RMODE_MASK; | |
131 | ||
132 | if (rmode == FPSCR_ROUND_NEAREST) { | |
133 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; | |
134 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) | |
135 | incr -= 1; | |
136 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
137 | incr = 0; | |
138 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) | |
139 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; | |
140 | ||
141 | pr_debug("VFP: rounding increment = 0x%08llx\n", incr); | |
142 | ||
143 | /* | |
144 | * Is our rounding going to overflow? | |
145 | */ | |
146 | if ((significand + incr) < significand) { | |
147 | exponent += 1; | |
148 | significand = (significand >> 1) | (significand & 1); | |
149 | incr >>= 1; | |
150 | #ifdef DEBUG | |
151 | vd->exponent = exponent; | |
152 | vd->significand = significand; | |
153 | vfp_double_dump("pack: overflow", vd); | |
154 | #endif | |
155 | } | |
156 | ||
157 | /* | |
158 | * If any of the low bits (which will be shifted out of the | |
159 | * number) are non-zero, the result is inexact. | |
160 | */ | |
161 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) | |
162 | exceptions |= FPSCR_IXC; | |
163 | ||
164 | /* | |
165 | * Do our rounding. | |
166 | */ | |
167 | significand += incr; | |
168 | ||
169 | /* | |
170 | * Infinity? | |
171 | */ | |
172 | if (exponent >= 2046) { | |
173 | exceptions |= FPSCR_OFC | FPSCR_IXC; | |
174 | if (incr == 0) { | |
175 | vd->exponent = 2045; | |
176 | vd->significand = 0x7fffffffffffffffULL; | |
177 | } else { | |
178 | vd->exponent = 2047; /* infinity */ | |
179 | vd->significand = 0; | |
180 | } | |
181 | } else { | |
182 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) | |
183 | exponent = 0; | |
184 | if (exponent || significand > 0x8000000000000000ULL) | |
185 | underflow = 0; | |
186 | if (underflow) | |
187 | exceptions |= FPSCR_UFC; | |
188 | vd->exponent = exponent; | |
189 | vd->significand = significand >> 1; | |
190 | } | |
191 | ||
192 | pack: | |
193 | vfp_double_dump("pack: final", vd); | |
194 | { | |
195 | s64 d = vfp_double_pack(vd); | |
196 | pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, | |
197 | dd, d, exceptions); | |
0355b3e0 | 198 | vfp_put_double(d, dd); |
1da177e4 | 199 | } |
928bd1b4 | 200 | return exceptions; |
1da177e4 LT |
201 | } |
202 | ||
203 | /* | |
204 | * Propagate the NaN, setting exceptions if it is signalling. | |
205 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. | |
206 | */ | |
207 | static u32 | |
208 | vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, | |
209 | struct vfp_double *vdm, u32 fpscr) | |
210 | { | |
211 | struct vfp_double *nan; | |
212 | int tn, tm = 0; | |
213 | ||
214 | tn = vfp_double_type(vdn); | |
215 | ||
216 | if (vdm) | |
217 | tm = vfp_double_type(vdm); | |
218 | ||
219 | if (fpscr & FPSCR_DEFAULT_NAN) | |
220 | /* | |
221 | * Default NaN mode - always returns a quiet NaN | |
222 | */ | |
223 | nan = &vfp_double_default_qnan; | |
224 | else { | |
225 | /* | |
226 | * Contemporary mode - select the first signalling | |
227 | * NAN, or if neither are signalling, the first | |
228 | * quiet NAN. | |
229 | */ | |
230 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | |
231 | nan = vdn; | |
232 | else | |
233 | nan = vdm; | |
234 | /* | |
235 | * Make the NaN quiet. | |
236 | */ | |
237 | nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | |
238 | } | |
239 | ||
240 | *vdd = *nan; | |
241 | ||
242 | /* | |
243 | * If one was a signalling NAN, raise invalid operation. | |
244 | */ | |
245 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | |
246 | } | |
247 | ||
248 | /* | |
249 | * Extended operations | |
250 | */ | |
251 | static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) | |
252 | { | |
0355b3e0 | 253 | vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd); |
1da177e4 LT |
254 | return 0; |
255 | } | |
256 | ||
257 | static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) | |
258 | { | |
0355b3e0 | 259 | vfp_put_double(vfp_get_double(dm), dd); |
1da177e4 LT |
260 | return 0; |
261 | } | |
262 | ||
263 | static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) | |
264 | { | |
0355b3e0 | 265 | vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd); |
1da177e4 LT |
266 | return 0; |
267 | } | |
268 | ||
269 | static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) | |
270 | { | |
271 | struct vfp_double vdm, vdd; | |
272 | int ret, tm; | |
273 | ||
274 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
275 | tm = vfp_double_type(&vdm); | |
276 | if (tm & (VFP_NAN|VFP_INFINITY)) { | |
277 | struct vfp_double *vdp = &vdd; | |
278 | ||
279 | if (tm & VFP_NAN) | |
280 | ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); | |
281 | else if (vdm.sign == 0) { | |
282 | sqrt_copy: | |
283 | vdp = &vdm; | |
284 | ret = 0; | |
285 | } else { | |
286 | sqrt_invalid: | |
287 | vdp = &vfp_double_default_qnan; | |
288 | ret = FPSCR_IOC; | |
289 | } | |
0355b3e0 | 290 | vfp_put_double(vfp_double_pack(vdp), dd); |
1da177e4 LT |
291 | return ret; |
292 | } | |
293 | ||
294 | /* | |
295 | * sqrt(+/- 0) == +/- 0 | |
296 | */ | |
297 | if (tm & VFP_ZERO) | |
298 | goto sqrt_copy; | |
299 | ||
300 | /* | |
301 | * Normalise a denormalised number | |
302 | */ | |
303 | if (tm & VFP_DENORMAL) | |
304 | vfp_double_normalise_denormal(&vdm); | |
305 | ||
306 | /* | |
307 | * sqrt(<0) = invalid | |
308 | */ | |
309 | if (vdm.sign) | |
310 | goto sqrt_invalid; | |
311 | ||
312 | vfp_double_dump("sqrt", &vdm); | |
313 | ||
314 | /* | |
315 | * Estimate the square root. | |
316 | */ | |
317 | vdd.sign = 0; | |
318 | vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; | |
319 | vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; | |
320 | ||
321 | vfp_double_dump("sqrt estimate1", &vdd); | |
322 | ||
323 | vdm.significand >>= 1 + (vdm.exponent & 1); | |
324 | vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); | |
325 | ||
326 | vfp_double_dump("sqrt estimate2", &vdd); | |
327 | ||
328 | /* | |
329 | * And now adjust. | |
330 | */ | |
331 | if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { | |
332 | if (vdd.significand < 2) { | |
333 | vdd.significand = ~0ULL; | |
334 | } else { | |
335 | u64 termh, terml, remh, reml; | |
336 | vdm.significand <<= 2; | |
337 | mul64to128(&termh, &terml, vdd.significand, vdd.significand); | |
338 | sub128(&remh, &reml, vdm.significand, 0, termh, terml); | |
339 | while ((s64)remh < 0) { | |
340 | vdd.significand -= 1; | |
341 | shift64left(&termh, &terml, vdd.significand); | |
342 | terml |= 1; | |
343 | add128(&remh, &reml, remh, reml, termh, terml); | |
344 | } | |
345 | vdd.significand |= (remh | reml) != 0; | |
346 | } | |
347 | } | |
348 | vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); | |
349 | ||
350 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); | |
351 | } | |
352 | ||
353 | /* | |
354 | * Equal := ZC | |
355 | * Less than := N | |
356 | * Greater than := C | |
357 | * Unordered := CV | |
358 | */ | |
359 | static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) | |
360 | { | |
361 | s64 d, m; | |
362 | u32 ret = 0; | |
363 | ||
364 | m = vfp_get_double(dm); | |
365 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { | |
366 | ret |= FPSCR_C | FPSCR_V; | |
367 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | |
368 | /* | |
369 | * Signalling NaN, or signalling on quiet NaN | |
370 | */ | |
371 | ret |= FPSCR_IOC; | |
372 | } | |
373 | ||
374 | d = vfp_get_double(dd); | |
375 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { | |
376 | ret |= FPSCR_C | FPSCR_V; | |
377 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | |
378 | /* | |
379 | * Signalling NaN, or signalling on quiet NaN | |
380 | */ | |
381 | ret |= FPSCR_IOC; | |
382 | } | |
383 | ||
384 | if (ret == 0) { | |
385 | if (d == m || vfp_double_packed_abs(d | m) == 0) { | |
386 | /* | |
387 | * equal | |
388 | */ | |
389 | ret |= FPSCR_Z | FPSCR_C; | |
390 | } else if (vfp_double_packed_sign(d ^ m)) { | |
391 | /* | |
392 | * different signs | |
393 | */ | |
394 | if (vfp_double_packed_sign(d)) | |
395 | /* | |
396 | * d is negative, so d < m | |
397 | */ | |
398 | ret |= FPSCR_N; | |
399 | else | |
400 | /* | |
401 | * d is positive, so d > m | |
402 | */ | |
403 | ret |= FPSCR_C; | |
404 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { | |
405 | /* | |
406 | * d < m | |
407 | */ | |
408 | ret |= FPSCR_N; | |
409 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { | |
410 | /* | |
411 | * d > m | |
412 | */ | |
413 | ret |= FPSCR_C; | |
414 | } | |
415 | } | |
416 | ||
417 | return ret; | |
418 | } | |
419 | ||
420 | static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) | |
421 | { | |
422 | return vfp_compare(dd, 0, dm, fpscr); | |
423 | } | |
424 | ||
425 | static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) | |
426 | { | |
427 | return vfp_compare(dd, 1, dm, fpscr); | |
428 | } | |
429 | ||
430 | static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) | |
431 | { | |
432 | return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); | |
433 | } | |
434 | ||
435 | static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) | |
436 | { | |
437 | return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); | |
438 | } | |
439 | ||
440 | static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) | |
441 | { | |
442 | struct vfp_double vdm; | |
443 | struct vfp_single vsd; | |
444 | int tm; | |
445 | u32 exceptions = 0; | |
446 | ||
447 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
448 | ||
449 | tm = vfp_double_type(&vdm); | |
450 | ||
451 | /* | |
452 | * If we have a signalling NaN, signal invalid operation. | |
453 | */ | |
454 | if (tm == VFP_SNAN) | |
455 | exceptions = FPSCR_IOC; | |
456 | ||
457 | if (tm & VFP_DENORMAL) | |
458 | vfp_double_normalise_denormal(&vdm); | |
459 | ||
460 | vsd.sign = vdm.sign; | |
461 | vsd.significand = vfp_hi64to32jamming(vdm.significand); | |
462 | ||
463 | /* | |
464 | * If we have an infinity or a NaN, the exponent must be 255 | |
465 | */ | |
466 | if (tm & (VFP_INFINITY|VFP_NAN)) { | |
467 | vsd.exponent = 255; | |
b53a2b41 | 468 | if (tm == VFP_QNAN) |
1da177e4 LT |
469 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; |
470 | goto pack_nan; | |
471 | } else if (tm & VFP_ZERO) | |
472 | vsd.exponent = 0; | |
473 | else | |
474 | vsd.exponent = vdm.exponent - (1023 - 127); | |
475 | ||
476 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); | |
477 | ||
478 | pack_nan: | |
0355b3e0 | 479 | vfp_put_float(vfp_single_pack(&vsd), sd); |
1da177e4 LT |
480 | return exceptions; |
481 | } | |
482 | ||
483 | static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) | |
484 | { | |
485 | struct vfp_double vdm; | |
486 | u32 m = vfp_get_float(dm); | |
487 | ||
488 | vdm.sign = 0; | |
489 | vdm.exponent = 1023 + 63 - 1; | |
490 | vdm.significand = (u64)m; | |
491 | ||
492 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); | |
493 | } | |
494 | ||
495 | static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) | |
496 | { | |
497 | struct vfp_double vdm; | |
498 | u32 m = vfp_get_float(dm); | |
499 | ||
500 | vdm.sign = (m & 0x80000000) >> 16; | |
501 | vdm.exponent = 1023 + 63 - 1; | |
502 | vdm.significand = vdm.sign ? -m : m; | |
503 | ||
504 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); | |
505 | } | |
506 | ||
507 | static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) | |
508 | { | |
509 | struct vfp_double vdm; | |
510 | u32 d, exceptions = 0; | |
511 | int rmode = fpscr & FPSCR_RMODE_MASK; | |
512 | int tm; | |
513 | ||
514 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
515 | ||
516 | /* | |
517 | * Do we have a denormalised number? | |
518 | */ | |
519 | tm = vfp_double_type(&vdm); | |
520 | if (tm & VFP_DENORMAL) | |
521 | exceptions |= FPSCR_IDC; | |
522 | ||
523 | if (tm & VFP_NAN) | |
524 | vdm.sign = 0; | |
525 | ||
526 | if (vdm.exponent >= 1023 + 32) { | |
527 | d = vdm.sign ? 0 : 0xffffffff; | |
528 | exceptions = FPSCR_IOC; | |
529 | } else if (vdm.exponent >= 1023 - 1) { | |
530 | int shift = 1023 + 63 - vdm.exponent; | |
531 | u64 rem, incr = 0; | |
532 | ||
533 | /* | |
534 | * 2^0 <= m < 2^32-2^8 | |
535 | */ | |
536 | d = (vdm.significand << 1) >> shift; | |
537 | rem = vdm.significand << (65 - shift); | |
538 | ||
539 | if (rmode == FPSCR_ROUND_NEAREST) { | |
540 | incr = 0x8000000000000000ULL; | |
541 | if ((d & 1) == 0) | |
542 | incr -= 1; | |
543 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
544 | incr = 0; | |
545 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | |
546 | incr = ~0ULL; | |
547 | } | |
548 | ||
549 | if ((rem + incr) < rem) { | |
550 | if (d < 0xffffffff) | |
551 | d += 1; | |
552 | else | |
553 | exceptions |= FPSCR_IOC; | |
554 | } | |
555 | ||
556 | if (d && vdm.sign) { | |
557 | d = 0; | |
558 | exceptions |= FPSCR_IOC; | |
559 | } else if (rem) | |
560 | exceptions |= FPSCR_IXC; | |
561 | } else { | |
562 | d = 0; | |
563 | if (vdm.exponent | vdm.significand) { | |
564 | exceptions |= FPSCR_IXC; | |
565 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | |
566 | d = 1; | |
567 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { | |
568 | d = 0; | |
569 | exceptions |= FPSCR_IOC; | |
570 | } | |
571 | } | |
572 | } | |
573 | ||
574 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | |
575 | ||
0355b3e0 | 576 | vfp_put_float(d, sd); |
1da177e4 LT |
577 | |
578 | return exceptions; | |
579 | } | |
580 | ||
581 | static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) | |
582 | { | |
583 | return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); | |
584 | } | |
585 | ||
586 | static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) | |
587 | { | |
588 | struct vfp_double vdm; | |
589 | u32 d, exceptions = 0; | |
590 | int rmode = fpscr & FPSCR_RMODE_MASK; | |
1320a80d | 591 | int tm; |
1da177e4 LT |
592 | |
593 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
594 | vfp_double_dump("VDM", &vdm); | |
595 | ||
596 | /* | |
597 | * Do we have denormalised number? | |
598 | */ | |
1320a80d CM |
599 | tm = vfp_double_type(&vdm); |
600 | if (tm & VFP_DENORMAL) | |
1da177e4 LT |
601 | exceptions |= FPSCR_IDC; |
602 | ||
1320a80d CM |
603 | if (tm & VFP_NAN) { |
604 | d = 0; | |
605 | exceptions |= FPSCR_IOC; | |
606 | } else if (vdm.exponent >= 1023 + 32) { | |
1da177e4 LT |
607 | d = 0x7fffffff; |
608 | if (vdm.sign) | |
609 | d = ~d; | |
610 | exceptions |= FPSCR_IOC; | |
611 | } else if (vdm.exponent >= 1023 - 1) { | |
612 | int shift = 1023 + 63 - vdm.exponent; /* 58 */ | |
613 | u64 rem, incr = 0; | |
614 | ||
615 | d = (vdm.significand << 1) >> shift; | |
616 | rem = vdm.significand << (65 - shift); | |
617 | ||
618 | if (rmode == FPSCR_ROUND_NEAREST) { | |
619 | incr = 0x8000000000000000ULL; | |
620 | if ((d & 1) == 0) | |
621 | incr -= 1; | |
622 | } else if (rmode == FPSCR_ROUND_TOZERO) { | |
623 | incr = 0; | |
624 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | |
625 | incr = ~0ULL; | |
626 | } | |
627 | ||
628 | if ((rem + incr) < rem && d < 0xffffffff) | |
629 | d += 1; | |
630 | if (d > 0x7fffffff + (vdm.sign != 0)) { | |
631 | d = 0x7fffffff + (vdm.sign != 0); | |
632 | exceptions |= FPSCR_IOC; | |
633 | } else if (rem) | |
634 | exceptions |= FPSCR_IXC; | |
635 | ||
636 | if (vdm.sign) | |
637 | d = -d; | |
638 | } else { | |
639 | d = 0; | |
640 | if (vdm.exponent | vdm.significand) { | |
641 | exceptions |= FPSCR_IXC; | |
642 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | |
643 | d = 1; | |
644 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) | |
645 | d = -1; | |
646 | } | |
647 | } | |
648 | ||
649 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | |
650 | ||
0355b3e0 | 651 | vfp_put_float((s32)d, sd); |
1da177e4 LT |
652 | |
653 | return exceptions; | |
654 | } | |
655 | ||
656 | static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) | |
657 | { | |
658 | return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); | |
659 | } | |
660 | ||
661 | ||
4cc9bd2e | 662 | static struct op fops_ext[32] = { |
baf97ce6 RK |
663 | [FEXT_TO_IDX(FEXT_FCPY)] = { vfp_double_fcpy, 0 }, |
664 | [FEXT_TO_IDX(FEXT_FABS)] = { vfp_double_fabs, 0 }, | |
665 | [FEXT_TO_IDX(FEXT_FNEG)] = { vfp_double_fneg, 0 }, | |
666 | [FEXT_TO_IDX(FEXT_FSQRT)] = { vfp_double_fsqrt, 0 }, | |
667 | [FEXT_TO_IDX(FEXT_FCMP)] = { vfp_double_fcmp, OP_SCALAR }, | |
668 | [FEXT_TO_IDX(FEXT_FCMPE)] = { vfp_double_fcmpe, OP_SCALAR }, | |
669 | [FEXT_TO_IDX(FEXT_FCMPZ)] = { vfp_double_fcmpz, OP_SCALAR }, | |
670 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = { vfp_double_fcmpez, OP_SCALAR }, | |
671 | [FEXT_TO_IDX(FEXT_FCVT)] = { vfp_double_fcvts, OP_SCALAR|OP_SD }, | |
672 | [FEXT_TO_IDX(FEXT_FUITO)] = { vfp_double_fuito, OP_SCALAR }, | |
673 | [FEXT_TO_IDX(FEXT_FSITO)] = { vfp_double_fsito, OP_SCALAR }, | |
674 | [FEXT_TO_IDX(FEXT_FTOUI)] = { vfp_double_ftoui, OP_SCALAR|OP_SD }, | |
675 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = { vfp_double_ftouiz, OP_SCALAR|OP_SD }, | |
676 | [FEXT_TO_IDX(FEXT_FTOSI)] = { vfp_double_ftosi, OP_SCALAR|OP_SD }, | |
677 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = { vfp_double_ftosiz, OP_SCALAR|OP_SD }, | |
1da177e4 LT |
678 | }; |
679 | ||
680 | ||
681 | ||
682 | ||
683 | static u32 | |
684 | vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, | |
685 | struct vfp_double *vdm, u32 fpscr) | |
686 | { | |
687 | struct vfp_double *vdp; | |
688 | u32 exceptions = 0; | |
689 | int tn, tm; | |
690 | ||
691 | tn = vfp_double_type(vdn); | |
692 | tm = vfp_double_type(vdm); | |
693 | ||
694 | if (tn & tm & VFP_INFINITY) { | |
695 | /* | |
696 | * Two infinities. Are they different signs? | |
697 | */ | |
698 | if (vdn->sign ^ vdm->sign) { | |
699 | /* | |
700 | * different signs -> invalid | |
701 | */ | |
702 | exceptions = FPSCR_IOC; | |
703 | vdp = &vfp_double_default_qnan; | |
704 | } else { | |
705 | /* | |
706 | * same signs -> valid | |
707 | */ | |
708 | vdp = vdn; | |
709 | } | |
710 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | |
711 | /* | |
712 | * One infinity and one number -> infinity | |
713 | */ | |
714 | vdp = vdn; | |
715 | } else { | |
716 | /* | |
717 | * 'n' is a NaN of some type | |
718 | */ | |
719 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | |
720 | } | |
721 | *vdd = *vdp; | |
722 | return exceptions; | |
723 | } | |
724 | ||
725 | static u32 | |
726 | vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, | |
727 | struct vfp_double *vdm, u32 fpscr) | |
728 | { | |
729 | u32 exp_diff; | |
730 | u64 m_sig; | |
731 | ||
732 | if (vdn->significand & (1ULL << 63) || | |
733 | vdm->significand & (1ULL << 63)) { | |
734 | pr_info("VFP: bad FP values in %s\n", __func__); | |
735 | vfp_double_dump("VDN", vdn); | |
736 | vfp_double_dump("VDM", vdm); | |
737 | } | |
738 | ||
739 | /* | |
740 | * Ensure that 'n' is the largest magnitude number. Note that | |
741 | * if 'n' and 'm' have equal exponents, we do not swap them. | |
742 | * This ensures that NaN propagation works correctly. | |
743 | */ | |
744 | if (vdn->exponent < vdm->exponent) { | |
745 | struct vfp_double *t = vdn; | |
746 | vdn = vdm; | |
747 | vdm = t; | |
748 | } | |
749 | ||
750 | /* | |
751 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, | |
752 | * infinity or a NaN here. | |
753 | */ | |
754 | if (vdn->exponent == 2047) | |
755 | return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); | |
756 | ||
757 | /* | |
758 | * We have two proper numbers, where 'vdn' is the larger magnitude. | |
759 | * | |
760 | * Copy 'n' to 'd' before doing the arithmetic. | |
761 | */ | |
762 | *vdd = *vdn; | |
763 | ||
764 | /* | |
765 | * Align 'm' with the result. | |
766 | */ | |
767 | exp_diff = vdn->exponent - vdm->exponent; | |
768 | m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); | |
769 | ||
770 | /* | |
771 | * If the signs are different, we are really subtracting. | |
772 | */ | |
773 | if (vdn->sign ^ vdm->sign) { | |
774 | m_sig = vdn->significand - m_sig; | |
775 | if ((s64)m_sig < 0) { | |
776 | vdd->sign = vfp_sign_negate(vdd->sign); | |
777 | m_sig = -m_sig; | |
7b1fbf29 CM |
778 | } else if (m_sig == 0) { |
779 | vdd->sign = (fpscr & FPSCR_RMODE_MASK) == | |
780 | FPSCR_ROUND_MINUSINF ? 0x8000 : 0; | |
1da177e4 LT |
781 | } |
782 | } else { | |
783 | m_sig += vdn->significand; | |
784 | } | |
785 | vdd->significand = m_sig; | |
786 | ||
787 | return 0; | |
788 | } | |
789 | ||
790 | static u32 | |
791 | vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, | |
792 | struct vfp_double *vdm, u32 fpscr) | |
793 | { | |
794 | vfp_double_dump("VDN", vdn); | |
795 | vfp_double_dump("VDM", vdm); | |
796 | ||
797 | /* | |
798 | * Ensure that 'n' is the largest magnitude number. Note that | |
799 | * if 'n' and 'm' have equal exponents, we do not swap them. | |
800 | * This ensures that NaN propagation works correctly. | |
801 | */ | |
802 | if (vdn->exponent < vdm->exponent) { | |
803 | struct vfp_double *t = vdn; | |
804 | vdn = vdm; | |
805 | vdm = t; | |
806 | pr_debug("VFP: swapping M <-> N\n"); | |
807 | } | |
808 | ||
809 | vdd->sign = vdn->sign ^ vdm->sign; | |
810 | ||
811 | /* | |
812 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. | |
813 | */ | |
814 | if (vdn->exponent == 2047) { | |
815 | if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) | |
816 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | |
817 | if ((vdm->exponent | vdm->significand) == 0) { | |
818 | *vdd = vfp_double_default_qnan; | |
819 | return FPSCR_IOC; | |
820 | } | |
821 | vdd->exponent = vdn->exponent; | |
822 | vdd->significand = 0; | |
823 | return 0; | |
824 | } | |
825 | ||
826 | /* | |
827 | * If 'm' is zero, the result is always zero. In this case, | |
828 | * 'n' may be zero or a number, but it doesn't matter which. | |
829 | */ | |
830 | if ((vdm->exponent | vdm->significand) == 0) { | |
831 | vdd->exponent = 0; | |
832 | vdd->significand = 0; | |
833 | return 0; | |
834 | } | |
835 | ||
836 | /* | |
837 | * We add 2 to the destination exponent for the same reason | |
838 | * as the addition case - though this time we have +1 from | |
839 | * each input operand. | |
840 | */ | |
841 | vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; | |
842 | vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); | |
843 | ||
844 | vfp_double_dump("VDD", vdd); | |
845 | return 0; | |
846 | } | |
847 | ||
848 | #define NEG_MULTIPLY (1 << 0) | |
849 | #define NEG_SUBTRACT (1 << 1) | |
850 | ||
851 | static u32 | |
852 | vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) | |
853 | { | |
854 | struct vfp_double vdd, vdp, vdn, vdm; | |
855 | u32 exceptions; | |
856 | ||
857 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
858 | if (vdn.exponent == 0 && vdn.significand) | |
859 | vfp_double_normalise_denormal(&vdn); | |
860 | ||
861 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
862 | if (vdm.exponent == 0 && vdm.significand) | |
863 | vfp_double_normalise_denormal(&vdm); | |
864 | ||
865 | exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); | |
866 | if (negate & NEG_MULTIPLY) | |
867 | vdp.sign = vfp_sign_negate(vdp.sign); | |
868 | ||
869 | vfp_double_unpack(&vdn, vfp_get_double(dd)); | |
870 | if (negate & NEG_SUBTRACT) | |
871 | vdn.sign = vfp_sign_negate(vdn.sign); | |
872 | ||
873 | exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); | |
874 | ||
875 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); | |
876 | } | |
877 | ||
878 | /* | |
879 | * Standard operations | |
880 | */ | |
881 | ||
882 | /* | |
883 | * sd = sd + (sn * sm) | |
884 | */ | |
885 | static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) | |
886 | { | |
887 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); | |
888 | } | |
889 | ||
890 | /* | |
891 | * sd = sd - (sn * sm) | |
892 | */ | |
893 | static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) | |
894 | { | |
895 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); | |
896 | } | |
897 | ||
898 | /* | |
899 | * sd = -sd + (sn * sm) | |
900 | */ | |
901 | static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) | |
902 | { | |
903 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); | |
904 | } | |
905 | ||
906 | /* | |
907 | * sd = -sd - (sn * sm) | |
908 | */ | |
909 | static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) | |
910 | { | |
911 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | |
912 | } | |
913 | ||
914 | /* | |
915 | * sd = sn * sm | |
916 | */ | |
917 | static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) | |
918 | { | |
919 | struct vfp_double vdd, vdn, vdm; | |
920 | u32 exceptions; | |
921 | ||
922 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
923 | if (vdn.exponent == 0 && vdn.significand) | |
924 | vfp_double_normalise_denormal(&vdn); | |
925 | ||
926 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
927 | if (vdm.exponent == 0 && vdm.significand) | |
928 | vfp_double_normalise_denormal(&vdm); | |
929 | ||
930 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | |
931 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); | |
932 | } | |
933 | ||
934 | /* | |
935 | * sd = -(sn * sm) | |
936 | */ | |
937 | static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) | |
938 | { | |
939 | struct vfp_double vdd, vdn, vdm; | |
940 | u32 exceptions; | |
941 | ||
942 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
943 | if (vdn.exponent == 0 && vdn.significand) | |
944 | vfp_double_normalise_denormal(&vdn); | |
945 | ||
946 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
947 | if (vdm.exponent == 0 && vdm.significand) | |
948 | vfp_double_normalise_denormal(&vdm); | |
949 | ||
950 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | |
951 | vdd.sign = vfp_sign_negate(vdd.sign); | |
952 | ||
953 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); | |
954 | } | |
955 | ||
956 | /* | |
957 | * sd = sn + sm | |
958 | */ | |
959 | static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) | |
960 | { | |
961 | struct vfp_double vdd, vdn, vdm; | |
962 | u32 exceptions; | |
963 | ||
964 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
965 | if (vdn.exponent == 0 && vdn.significand) | |
966 | vfp_double_normalise_denormal(&vdn); | |
967 | ||
968 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
969 | if (vdm.exponent == 0 && vdm.significand) | |
970 | vfp_double_normalise_denormal(&vdm); | |
971 | ||
972 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | |
973 | ||
974 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); | |
975 | } | |
976 | ||
977 | /* | |
978 | * sd = sn - sm | |
979 | */ | |
980 | static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) | |
981 | { | |
982 | struct vfp_double vdd, vdn, vdm; | |
983 | u32 exceptions; | |
984 | ||
985 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
986 | if (vdn.exponent == 0 && vdn.significand) | |
987 | vfp_double_normalise_denormal(&vdn); | |
988 | ||
989 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
990 | if (vdm.exponent == 0 && vdm.significand) | |
991 | vfp_double_normalise_denormal(&vdm); | |
992 | ||
993 | /* | |
994 | * Subtraction is like addition, but with a negated operand. | |
995 | */ | |
996 | vdm.sign = vfp_sign_negate(vdm.sign); | |
997 | ||
998 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | |
999 | ||
1000 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); | |
1001 | } | |
1002 | ||
1003 | /* | |
1004 | * sd = sn / sm | |
1005 | */ | |
1006 | static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) | |
1007 | { | |
1008 | struct vfp_double vdd, vdn, vdm; | |
1009 | u32 exceptions = 0; | |
1010 | int tm, tn; | |
1011 | ||
1012 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | |
1013 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | |
1014 | ||
1015 | vdd.sign = vdn.sign ^ vdm.sign; | |
1016 | ||
1017 | tn = vfp_double_type(&vdn); | |
1018 | tm = vfp_double_type(&vdm); | |
1019 | ||
1020 | /* | |
1021 | * Is n a NAN? | |
1022 | */ | |
1023 | if (tn & VFP_NAN) | |
1024 | goto vdn_nan; | |
1025 | ||
1026 | /* | |
1027 | * Is m a NAN? | |
1028 | */ | |
1029 | if (tm & VFP_NAN) | |
1030 | goto vdm_nan; | |
1031 | ||
1032 | /* | |
1033 | * If n and m are infinity, the result is invalid | |
1034 | * If n and m are zero, the result is invalid | |
1035 | */ | |
1036 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | |
1037 | goto invalid; | |
1038 | ||
1039 | /* | |
1040 | * If n is infinity, the result is infinity | |
1041 | */ | |
1042 | if (tn & VFP_INFINITY) | |
1043 | goto infinity; | |
1044 | ||
1045 | /* | |
1046 | * If m is zero, raise div0 exceptions | |
1047 | */ | |
1048 | if (tm & VFP_ZERO) | |
1049 | goto divzero; | |
1050 | ||
1051 | /* | |
1052 | * If m is infinity, or n is zero, the result is zero | |
1053 | */ | |
1054 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | |
1055 | goto zero; | |
1056 | ||
1057 | if (tn & VFP_DENORMAL) | |
1058 | vfp_double_normalise_denormal(&vdn); | |
1059 | if (tm & VFP_DENORMAL) | |
1060 | vfp_double_normalise_denormal(&vdm); | |
1061 | ||
1062 | /* | |
1063 | * Ok, we have two numbers, we can perform division. | |
1064 | */ | |
1065 | vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; | |
1066 | vdm.significand <<= 1; | |
1067 | if (vdm.significand <= (2 * vdn.significand)) { | |
1068 | vdn.significand >>= 1; | |
1069 | vdd.exponent++; | |
1070 | } | |
1071 | vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); | |
1072 | if ((vdd.significand & 0x1ff) <= 2) { | |
1073 | u64 termh, terml, remh, reml; | |
1074 | mul64to128(&termh, &terml, vdm.significand, vdd.significand); | |
1075 | sub128(&remh, &reml, vdn.significand, 0, termh, terml); | |
1076 | while ((s64)remh < 0) { | |
1077 | vdd.significand -= 1; | |
1078 | add128(&remh, &reml, remh, reml, 0, vdm.significand); | |
1079 | } | |
1080 | vdd.significand |= (reml != 0); | |
1081 | } | |
1082 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); | |
1083 | ||
1084 | vdn_nan: | |
1085 | exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); | |
1086 | pack: | |
0355b3e0 | 1087 | vfp_put_double(vfp_double_pack(&vdd), dd); |
1da177e4 LT |
1088 | return exceptions; |
1089 | ||
1090 | vdm_nan: | |
1091 | exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); | |
1092 | goto pack; | |
1093 | ||
1094 | zero: | |
1095 | vdd.exponent = 0; | |
1096 | vdd.significand = 0; | |
1097 | goto pack; | |
1098 | ||
1099 | divzero: | |
1100 | exceptions = FPSCR_DZC; | |
1101 | infinity: | |
1102 | vdd.exponent = 2047; | |
1103 | vdd.significand = 0; | |
1104 | goto pack; | |
1105 | ||
1106 | invalid: | |
0355b3e0 | 1107 | vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd); |
1da177e4 LT |
1108 | return FPSCR_IOC; |
1109 | } | |
1110 | ||
4cc9bd2e | 1111 | static struct op fops[16] = { |
baf97ce6 RK |
1112 | [FOP_TO_IDX(FOP_FMAC)] = { vfp_double_fmac, 0 }, |
1113 | [FOP_TO_IDX(FOP_FNMAC)] = { vfp_double_fnmac, 0 }, | |
1114 | [FOP_TO_IDX(FOP_FMSC)] = { vfp_double_fmsc, 0 }, | |
1115 | [FOP_TO_IDX(FOP_FNMSC)] = { vfp_double_fnmsc, 0 }, | |
1116 | [FOP_TO_IDX(FOP_FMUL)] = { vfp_double_fmul, 0 }, | |
1117 | [FOP_TO_IDX(FOP_FNMUL)] = { vfp_double_fnmul, 0 }, | |
1118 | [FOP_TO_IDX(FOP_FADD)] = { vfp_double_fadd, 0 }, | |
1119 | [FOP_TO_IDX(FOP_FSUB)] = { vfp_double_fsub, 0 }, | |
1120 | [FOP_TO_IDX(FOP_FDIV)] = { vfp_double_fdiv, 0 }, | |
1da177e4 LT |
1121 | }; |
1122 | ||
1123 | #define FREG_BANK(x) ((x) & 0x0c) | |
1124 | #define FREG_IDX(x) ((x) & 3) | |
1125 | ||
1126 | u32 vfp_double_cpdo(u32 inst, u32 fpscr) | |
1127 | { | |
1128 | u32 op = inst & FOP_MASK; | |
1129 | u32 exceptions = 0; | |
c29ecac1 | 1130 | unsigned int dest; |
1356c194 CM |
1131 | unsigned int dn = vfp_get_dn(inst); |
1132 | unsigned int dm = vfp_get_dm(inst); | |
1da177e4 | 1133 | unsigned int vecitr, veclen, vecstride; |
4cc9bd2e | 1134 | struct op *fop; |
1da177e4 | 1135 | |
1da177e4 LT |
1136 | vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; |
1137 | ||
4cc9bd2e | 1138 | fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)]; |
baf97ce6 | 1139 | |
c29ecac1 DJ |
1140 | /* |
1141 | * fcvtds takes an sN register number as destination, not dN. | |
1142 | * It also always operates on scalars. | |
1143 | */ | |
4cc9bd2e | 1144 | if (fop->flags & OP_SD) |
c29ecac1 | 1145 | dest = vfp_get_sd(inst); |
4cc9bd2e | 1146 | else |
c29ecac1 DJ |
1147 | dest = vfp_get_dd(inst); |
1148 | ||
1da177e4 LT |
1149 | /* |
1150 | * If destination bank is zero, vector length is always '1'. | |
1151 | * ARM DDI0100F C5.1.3, C5.3.2. | |
1152 | */ | |
4cc9bd2e | 1153 | if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0)) |
1da177e4 | 1154 | veclen = 0; |
4cc9bd2e GF |
1155 | else |
1156 | veclen = fpscr & FPSCR_LENGTH_MASK; | |
1da177e4 LT |
1157 | |
1158 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | |
1159 | (veclen >> FPSCR_LENGTH_BIT) + 1); | |
1160 | ||
4cc9bd2e | 1161 | if (!fop->fn) |
1da177e4 LT |
1162 | goto invalid; |
1163 | ||
1164 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | |
1165 | u32 except; | |
baf97ce6 | 1166 | char type; |
1da177e4 | 1167 | |
baf97ce6 RK |
1168 | type = fop->flags & OP_SD ? 's' : 'd'; |
1169 | if (op == FOP_EXT) | |
1170 | pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n", | |
1da177e4 | 1171 | vecitr >> FPSCR_LENGTH_BIT, |
baf97ce6 | 1172 | type, dest, dn, dm); |
1da177e4 | 1173 | else |
baf97ce6 | 1174 | pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n", |
1da177e4 | 1175 | vecitr >> FPSCR_LENGTH_BIT, |
baf97ce6 | 1176 | type, dest, dn, FOP_TO_IDX(op), dm); |
1da177e4 | 1177 | |
4cc9bd2e | 1178 | except = fop->fn(dest, dn, dm, fpscr); |
1da177e4 LT |
1179 | pr_debug("VFP: itr%d: exceptions=%08x\n", |
1180 | vecitr >> FPSCR_LENGTH_BIT, except); | |
1181 | ||
1182 | exceptions |= except; | |
1183 | ||
1da177e4 LT |
1184 | /* |
1185 | * CHECK: It appears to be undefined whether we stop when | |
1186 | * we encounter an exception. We continue. | |
1187 | */ | |
c29ecac1 | 1188 | dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 6); |
1da177e4 LT |
1189 | dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); |
1190 | if (FREG_BANK(dm) != 0) | |
1191 | dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); | |
1192 | } | |
1193 | return exceptions; | |
1194 | ||
1195 | invalid: | |
1196 | return ~0; | |
1197 | } |