sh: SH-2A FPU support.
[deliverable/linux.git] / arch / sh / kernel / cpu / sh2a / fpu.c
1 /*
2 * Save/restore floating point context for signal handlers.
3 *
4 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 *
10 * FIXME! These routines can be optimized in big endian case.
11 */
12 #include <linux/sched.h>
13 #include <linux/signal.h>
14 #include <asm/processor.h>
15 #include <asm/io.h>
16
17 /* The PR (precision) bit in the FP Status Register must be clear when
18 * an frchg instruction is executed, otherwise the instruction is undefined.
19 * Executing frchg with PR set causes a trap on some SH4 implementations.
20 */
21
22 #define FPSCR_RCHG 0x00000000
23
24
25 /*
26 * Save FPU registers onto task structure.
27 * Assume called with FPU enabled (SR.FD=0).
28 */
29 void
30 save_fpu(struct task_struct *tsk, struct pt_regs *regs)
31 {
32 unsigned long dummy;
33
34 clear_tsk_thread_flag(tsk, TIF_USEDFPU);
35 enable_fpu();
36 asm volatile("sts.l fpul, @-%0\n\t"
37 "sts.l fpscr, @-%0\n\t"
38 "fmov.s fr15, @-%0\n\t"
39 "fmov.s fr14, @-%0\n\t"
40 "fmov.s fr13, @-%0\n\t"
41 "fmov.s fr12, @-%0\n\t"
42 "fmov.s fr11, @-%0\n\t"
43 "fmov.s fr10, @-%0\n\t"
44 "fmov.s fr9, @-%0\n\t"
45 "fmov.s fr8, @-%0\n\t"
46 "fmov.s fr7, @-%0\n\t"
47 "fmov.s fr6, @-%0\n\t"
48 "fmov.s fr5, @-%0\n\t"
49 "fmov.s fr4, @-%0\n\t"
50 "fmov.s fr3, @-%0\n\t"
51 "fmov.s fr2, @-%0\n\t"
52 "fmov.s fr1, @-%0\n\t"
53 "fmov.s fr0, @-%0\n\t"
54 "lds %3, fpscr\n\t"
55 : "=r" (dummy)
56 : "0" ((char *)(&tsk->thread.fpu.hard.status)),
57 "r" (FPSCR_RCHG),
58 "r" (FPSCR_INIT)
59 : "memory");
60
61 disable_fpu();
62 release_fpu(regs);
63 }
64
65 static void
66 restore_fpu(struct task_struct *tsk)
67 {
68 unsigned long dummy;
69
70 enable_fpu();
71 asm volatile("fmov.s @%0+, fr0\n\t"
72 "fmov.s @%0+, fr1\n\t"
73 "fmov.s @%0+, fr2\n\t"
74 "fmov.s @%0+, fr3\n\t"
75 "fmov.s @%0+, fr4\n\t"
76 "fmov.s @%0+, fr5\n\t"
77 "fmov.s @%0+, fr6\n\t"
78 "fmov.s @%0+, fr7\n\t"
79 "fmov.s @%0+, fr8\n\t"
80 "fmov.s @%0+, fr9\n\t"
81 "fmov.s @%0+, fr10\n\t"
82 "fmov.s @%0+, fr11\n\t"
83 "fmov.s @%0+, fr12\n\t"
84 "fmov.s @%0+, fr13\n\t"
85 "fmov.s @%0+, fr14\n\t"
86 "fmov.s @%0+, fr15\n\t"
87 "lds.l @%0+, fpscr\n\t"
88 "lds.l @%0+, fpul\n\t"
89 : "=r" (dummy)
90 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
91 : "memory");
92 disable_fpu();
93 }
94
95 /*
96 * Load the FPU with signalling NANS. This bit pattern we're using
97 * has the property that no matter wether considered as single or as
98 * double precission represents signaling NANS.
99 */
100
101 static void
102 fpu_init(void)
103 {
104 enable_fpu();
105 asm volatile("lds %0, fpul\n\t"
106 "fsts fpul, fr0\n\t"
107 "fsts fpul, fr1\n\t"
108 "fsts fpul, fr2\n\t"
109 "fsts fpul, fr3\n\t"
110 "fsts fpul, fr4\n\t"
111 "fsts fpul, fr5\n\t"
112 "fsts fpul, fr6\n\t"
113 "fsts fpul, fr7\n\t"
114 "fsts fpul, fr8\n\t"
115 "fsts fpul, fr9\n\t"
116 "fsts fpul, fr10\n\t"
117 "fsts fpul, fr11\n\t"
118 "fsts fpul, fr12\n\t"
119 "fsts fpul, fr13\n\t"
120 "fsts fpul, fr14\n\t"
121 "fsts fpul, fr15\n\t"
122 "lds %2, fpscr\n\t"
123 : /* no output */
124 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
125 disable_fpu();
126 }
127
128 /*
129 * Emulate arithmetic ops on denormalized number for some FPU insns.
130 */
131
132 /* denormalized float * float */
133 static int denormal_mulf(int hx, int hy)
134 {
135 unsigned int ix, iy;
136 unsigned long long m, n;
137 int exp, w;
138
139 ix = hx & 0x7fffffff;
140 iy = hy & 0x7fffffff;
141 if (iy < 0x00800000 || ix == 0)
142 return ((hx ^ hy) & 0x80000000);
143
144 exp = (iy & 0x7f800000) >> 23;
145 ix &= 0x007fffff;
146 iy = (iy & 0x007fffff) | 0x00800000;
147 m = (unsigned long long)ix * iy;
148 n = m;
149 w = -1;
150 while (n) { n >>= 1; w++; }
151
152 /* FIXME: use guard bits */
153 exp += w - 126 - 46;
154 if (exp > 0)
155 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
156 else if (exp + 22 >= 0)
157 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
158 else
159 ix = 0;
160
161 ix |= (hx ^ hy) & 0x80000000;
162 return ix;
163 }
164
165 /* denormalized double * double */
166 static void mult64(unsigned long long x, unsigned long long y,
167 unsigned long long *highp, unsigned long long *lowp)
168 {
169 unsigned long long sub0, sub1, sub2, sub3;
170 unsigned long long high, low;
171
172 sub0 = (x >> 32) * (unsigned long) (y >> 32);
173 sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
174 sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
175 sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
176 low = sub3;
177 high = 0LL;
178 sub3 += (sub1 << 32);
179 if (low > sub3)
180 high++;
181 low = sub3;
182 sub3 += (sub2 << 32);
183 if (low > sub3)
184 high++;
185 low = sub3;
186 high += (sub1 >> 32) + (sub2 >> 32);
187 high += sub0;
188 *lowp = low;
189 *highp = high;
190 }
191
192 static inline long long rshift64(unsigned long long mh,
193 unsigned long long ml, int n)
194 {
195 if (n >= 64)
196 return mh >> (n - 64);
197 return (mh << (64 - n)) | (ml >> n);
198 }
199
200 static long long denormal_muld(long long hx, long long hy)
201 {
202 unsigned long long ix, iy;
203 unsigned long long mh, ml, nh, nl;
204 int exp, w;
205
206 ix = hx & 0x7fffffffffffffffLL;
207 iy = hy & 0x7fffffffffffffffLL;
208 if (iy < 0x0010000000000000LL || ix == 0)
209 return ((hx ^ hy) & 0x8000000000000000LL);
210
211 exp = (iy & 0x7ff0000000000000LL) >> 52;
212 ix &= 0x000fffffffffffffLL;
213 iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
214 mult64(ix, iy, &mh, &ml);
215 nh = mh;
216 nl = ml;
217 w = -1;
218 if (nh) {
219 while (nh) { nh >>= 1; w++;}
220 w += 64;
221 } else
222 while (nl) { nl >>= 1; w++;}
223
224 /* FIXME: use guard bits */
225 exp += w - 1022 - 52 * 2;
226 if (exp > 0)
227 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
228 | ((long long)exp << 52);
229 else if (exp + 51 >= 0)
230 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
231 else
232 ix = 0;
233
234 ix |= (hx ^ hy) & 0x8000000000000000LL;
235 return ix;
236 }
237
238 /* ix - iy where iy: denormal and ix, iy >= 0 */
239 static int denormal_subf1(unsigned int ix, unsigned int iy)
240 {
241 int frac;
242 int exp;
243
244 if (ix < 0x00800000)
245 return ix - iy;
246
247 exp = (ix & 0x7f800000) >> 23;
248 if (exp - 1 > 31)
249 return ix;
250 iy >>= exp - 1;
251 if (iy == 0)
252 return ix;
253
254 frac = (ix & 0x007fffff) | 0x00800000;
255 frac -= iy;
256 while (frac < 0x00800000) {
257 if (--exp == 0)
258 return frac;
259 frac <<= 1;
260 }
261
262 return (exp << 23) | (frac & 0x007fffff);
263 }
264
265 /* ix + iy where iy: denormal and ix, iy >= 0 */
266 static int denormal_addf1(unsigned int ix, unsigned int iy)
267 {
268 int frac;
269 int exp;
270
271 if (ix < 0x00800000)
272 return ix + iy;
273
274 exp = (ix & 0x7f800000) >> 23;
275 if (exp - 1 > 31)
276 return ix;
277 iy >>= exp - 1;
278 if (iy == 0)
279 return ix;
280
281 frac = (ix & 0x007fffff) | 0x00800000;
282 frac += iy;
283 if (frac >= 0x01000000) {
284 frac >>= 1;
285 ++exp;
286 }
287
288 return (exp << 23) | (frac & 0x007fffff);
289 }
290
291 static int denormal_addf(int hx, int hy)
292 {
293 unsigned int ix, iy;
294 int sign;
295
296 if ((hx ^ hy) & 0x80000000) {
297 sign = hx & 0x80000000;
298 ix = hx & 0x7fffffff;
299 iy = hy & 0x7fffffff;
300 if (iy < 0x00800000) {
301 ix = denormal_subf1(ix, iy);
302 if (ix < 0) {
303 ix = -ix;
304 sign ^= 0x80000000;
305 }
306 } else {
307 ix = denormal_subf1(iy, ix);
308 sign ^= 0x80000000;
309 }
310 } else {
311 sign = hx & 0x80000000;
312 ix = hx & 0x7fffffff;
313 iy = hy & 0x7fffffff;
314 if (iy < 0x00800000)
315 ix = denormal_addf1(ix, iy);
316 else
317 ix = denormal_addf1(iy, ix);
318 }
319
320 return sign | ix;
321 }
322
323 /* ix - iy where iy: denormal and ix, iy >= 0 */
324 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
325 {
326 long long frac;
327 int exp;
328
329 if (ix < 0x0010000000000000LL)
330 return ix - iy;
331
332 exp = (ix & 0x7ff0000000000000LL) >> 52;
333 if (exp - 1 > 63)
334 return ix;
335 iy >>= exp - 1;
336 if (iy == 0)
337 return ix;
338
339 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
340 frac -= iy;
341 while (frac < 0x0010000000000000LL) {
342 if (--exp == 0)
343 return frac;
344 frac <<= 1;
345 }
346
347 return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
348 }
349
350 /* ix + iy where iy: denormal and ix, iy >= 0 */
351 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
352 {
353 long long frac;
354 long long exp;
355
356 if (ix < 0x0010000000000000LL)
357 return ix + iy;
358
359 exp = (ix & 0x7ff0000000000000LL) >> 52;
360 if (exp - 1 > 63)
361 return ix;
362 iy >>= exp - 1;
363 if (iy == 0)
364 return ix;
365
366 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
367 frac += iy;
368 if (frac >= 0x0020000000000000LL) {
369 frac >>= 1;
370 ++exp;
371 }
372
373 return (exp << 52) | (frac & 0x000fffffffffffffLL);
374 }
375
376 static long long denormal_addd(long long hx, long long hy)
377 {
378 unsigned long long ix, iy;
379 long long sign;
380
381 if ((hx ^ hy) & 0x8000000000000000LL) {
382 sign = hx & 0x8000000000000000LL;
383 ix = hx & 0x7fffffffffffffffLL;
384 iy = hy & 0x7fffffffffffffffLL;
385 if (iy < 0x0010000000000000LL) {
386 ix = denormal_subd1(ix, iy);
387 if (ix < 0) {
388 ix = -ix;
389 sign ^= 0x8000000000000000LL;
390 }
391 } else {
392 ix = denormal_subd1(iy, ix);
393 sign ^= 0x8000000000000000LL;
394 }
395 } else {
396 sign = hx & 0x8000000000000000LL;
397 ix = hx & 0x7fffffffffffffffLL;
398 iy = hy & 0x7fffffffffffffffLL;
399 if (iy < 0x0010000000000000LL)
400 ix = denormal_addd1(ix, iy);
401 else
402 ix = denormal_addd1(iy, ix);
403 }
404
405 return sign | ix;
406 }
407
408 /**
409 * denormal_to_double - Given denormalized float number,
410 * store double float
411 *
412 * @fpu: Pointer to sh_fpu_hard structure
413 * @n: Index to FP register
414 */
415 static void
416 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
417 {
418 unsigned long du, dl;
419 unsigned long x = fpu->fpul;
420 int exp = 1023 - 126;
421
422 if (x != 0 && (x & 0x7f800000) == 0) {
423 du = (x & 0x80000000);
424 while ((x & 0x00800000) == 0) {
425 x <<= 1;
426 exp--;
427 }
428 x &= 0x007fffff;
429 du |= (exp << 20) | (x >> 3);
430 dl = x << 29;
431
432 fpu->fp_regs[n] = du;
433 fpu->fp_regs[n+1] = dl;
434 }
435 }
436
437 /**
438 * ieee_fpe_handler - Handle denormalized number exception
439 *
440 * @regs: Pointer to register structure
441 *
442 * Returns 1 when it's handled (should not cause exception).
443 */
444 static int
445 ieee_fpe_handler (struct pt_regs *regs)
446 {
447 unsigned short insn = *(unsigned short *) regs->pc;
448 unsigned short finsn;
449 unsigned long nextpc;
450 int nib[4] = {
451 (insn >> 12) & 0xf,
452 (insn >> 8) & 0xf,
453 (insn >> 4) & 0xf,
454 insn & 0xf};
455
456 if (nib[0] == 0xb ||
457 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
458 regs->pr = regs->pc + 4;
459 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
460 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
461 finsn = *(unsigned short *) (regs->pc + 2);
462 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
463 if (regs->sr & 1)
464 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
465 else
466 nextpc = regs->pc + 4;
467 finsn = *(unsigned short *) (regs->pc + 2);
468 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
469 if (regs->sr & 1)
470 nextpc = regs->pc + 4;
471 else
472 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
473 finsn = *(unsigned short *) (regs->pc + 2);
474 } else if (nib[0] == 0x4 && nib[3] == 0xb &&
475 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
476 nextpc = regs->regs[nib[1]];
477 finsn = *(unsigned short *) (regs->pc + 2);
478 } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
479 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
480 nextpc = regs->pc + 4 + regs->regs[nib[1]];
481 finsn = *(unsigned short *) (regs->pc + 2);
482 } else if (insn == 0x000b) { /* rts */
483 nextpc = regs->pr;
484 finsn = *(unsigned short *) (regs->pc + 2);
485 } else {
486 nextpc = regs->pc + 2;
487 finsn = insn;
488 }
489
490 #define FPSCR_FPU_ERROR (1 << 17)
491
492 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
493 struct task_struct *tsk = current;
494
495 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
496 /* FPU error */
497 denormal_to_double (&tsk->thread.fpu.hard,
498 (finsn >> 8) & 0xf);
499 } else
500 return 0;
501
502 regs->pc = nextpc;
503 return 1;
504 } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
505 struct task_struct *tsk = current;
506 int fpscr;
507 int n, m, prec;
508 unsigned int hx, hy;
509
510 n = (finsn >> 8) & 0xf;
511 m = (finsn >> 4) & 0xf;
512 hx = tsk->thread.fpu.hard.fp_regs[n];
513 hy = tsk->thread.fpu.hard.fp_regs[m];
514 fpscr = tsk->thread.fpu.hard.fpscr;
515 prec = fpscr & (1 << 19);
516
517 if ((fpscr & FPSCR_FPU_ERROR)
518 && (prec && ((hx & 0x7fffffff) < 0x00100000
519 || (hy & 0x7fffffff) < 0x00100000))) {
520 long long llx, lly;
521
522 /* FPU error because of denormal */
523 llx = ((long long) hx << 32)
524 | tsk->thread.fpu.hard.fp_regs[n+1];
525 lly = ((long long) hy << 32)
526 | tsk->thread.fpu.hard.fp_regs[m+1];
527 if ((hx & 0x7fffffff) >= 0x00100000)
528 llx = denormal_muld(lly, llx);
529 else
530 llx = denormal_muld(llx, lly);
531 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
532 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
533 } else if ((fpscr & FPSCR_FPU_ERROR)
534 && (!prec && ((hx & 0x7fffffff) < 0x00800000
535 || (hy & 0x7fffffff) < 0x00800000))) {
536 /* FPU error because of denormal */
537 if ((hx & 0x7fffffff) >= 0x00800000)
538 hx = denormal_mulf(hy, hx);
539 else
540 hx = denormal_mulf(hx, hy);
541 tsk->thread.fpu.hard.fp_regs[n] = hx;
542 } else
543 return 0;
544
545 regs->pc = nextpc;
546 return 1;
547 } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
548 struct task_struct *tsk = current;
549 int fpscr;
550 int n, m, prec;
551 unsigned int hx, hy;
552
553 n = (finsn >> 8) & 0xf;
554 m = (finsn >> 4) & 0xf;
555 hx = tsk->thread.fpu.hard.fp_regs[n];
556 hy = tsk->thread.fpu.hard.fp_regs[m];
557 fpscr = tsk->thread.fpu.hard.fpscr;
558 prec = fpscr & (1 << 19);
559
560 if ((fpscr & FPSCR_FPU_ERROR)
561 && (prec && ((hx & 0x7fffffff) < 0x00100000
562 || (hy & 0x7fffffff) < 0x00100000))) {
563 long long llx, lly;
564
565 /* FPU error because of denormal */
566 llx = ((long long) hx << 32)
567 | tsk->thread.fpu.hard.fp_regs[n+1];
568 lly = ((long long) hy << 32)
569 | tsk->thread.fpu.hard.fp_regs[m+1];
570 if ((finsn & 0xf00f) == 0xf000)
571 llx = denormal_addd(llx, lly);
572 else
573 llx = denormal_addd(llx, lly ^ (1LL << 63));
574 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
575 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
576 } else if ((fpscr & FPSCR_FPU_ERROR)
577 && (!prec && ((hx & 0x7fffffff) < 0x00800000
578 || (hy & 0x7fffffff) < 0x00800000))) {
579 /* FPU error because of denormal */
580 if ((finsn & 0xf00f) == 0xf000)
581 hx = denormal_addf(hx, hy);
582 else
583 hx = denormal_addf(hx, hy ^ 0x80000000);
584 tsk->thread.fpu.hard.fp_regs[n] = hx;
585 } else
586 return 0;
587
588 regs->pc = nextpc;
589 return 1;
590 }
591
592 return 0;
593 }
594
595 BUILD_TRAP_HANDLER(fpu_error)
596 {
597 struct task_struct *tsk = current;
598 TRAP_HANDLER_DECL;
599
600 save_fpu(tsk, regs);
601 if (ieee_fpe_handler(regs)) {
602 tsk->thread.fpu.hard.fpscr &=
603 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
604 grab_fpu(regs);
605 restore_fpu(tsk);
606 set_tsk_thread_flag(tsk, TIF_USEDFPU);
607 return;
608 }
609
610 force_sig(SIGFPE, tsk);
611 }
612
613 BUILD_TRAP_HANDLER(fpu_state_restore)
614 {
615 struct task_struct *tsk = current;
616 TRAP_HANDLER_DECL;
617
618 grab_fpu(regs);
619 if (!user_mode(regs)) {
620 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
621 return;
622 }
623
624 if (used_math()) {
625 /* Using the FPU again. */
626 restore_fpu(tsk);
627 } else {
628 /* First time FPU user. */
629 fpu_init();
630 set_used_math();
631 }
632 set_tsk_thread_flag(tsk, TIF_USEDFPU);
633 }
This page took 0.079721 seconds and 5 git commands to generate.