powerpc: Add transactional memory unavaliable execption handler
[deliverable/linux.git] / arch / powerpc / kernel / vector.S
1 #include <asm/processor.h>
2 #include <asm/ppc_asm.h>
3 #include <asm/reg.h>
4 #include <asm/asm-offsets.h>
5 #include <asm/cputable.h>
6 #include <asm/thread_info.h>
7 #include <asm/page.h>
8 #include <asm/ptrace.h>
9
10 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
11 /*
12 * Wrapper to call load_up_altivec from C.
13 * void do_load_up_altivec(struct pt_regs *regs);
14 */
15 _GLOBAL(do_load_up_altivec)
16 mflr r0
17 std r0, 16(r1)
18 stdu r1, -112(r1)
19
20 subi r6, r3, STACK_FRAME_OVERHEAD
21 /* load_up_altivec expects r12=MSR, r13=PACA, and returns
22 * with r12 = new MSR.
23 */
24 ld r12,_MSR(r6)
25 GET_PACA(r13)
26 bl load_up_altivec
27 std r12,_MSR(r6)
28
29 ld r0, 112+16(r1)
30 addi r1, r1, 112
31 mtlr r0
32 blr
33
34 /* void do_load_up_transact_altivec(struct thread_struct *thread)
35 *
36 * This is similar to load_up_altivec but for the transactional version of the
37 * vector regs. It doesn't mess with the task MSR or valid flags.
38 * Furthermore, VEC laziness is not supported with TM currently.
39 */
40 _GLOBAL(do_load_up_transact_altivec)
41 mfmsr r6
42 oris r5,r6,MSR_VEC@h
43 MTMSRD(r5)
44 isync
45
46 li r4,1
47 stw r4,THREAD_USED_VR(r3)
48
49 li r10,THREAD_TRANSACT_VSCR
50 lvx vr0,r10,r3
51 mtvscr vr0
52 REST_32VRS_TRANSACT(0,r4,r3)
53
54 /* Disable VEC again. */
55 MTMSRD(r6)
56 isync
57
58 blr
59 #endif
60
61 /*
62 * load_up_altivec(unused, unused, tsk)
63 * Disable VMX for the task which had it previously,
64 * and save its vector registers in its thread_struct.
65 * Enables the VMX for use in the kernel on return.
66 * On SMP we know the VMX is free, since we give it up every
67 * switch (ie, no lazy save of the vector registers).
68 */
69 _GLOBAL(load_up_altivec)
70 mfmsr r5 /* grab the current MSR */
71 oris r5,r5,MSR_VEC@h
72 MTMSRD(r5) /* enable use of AltiVec now */
73 isync
74
75 /*
76 * For SMP, we don't do lazy VMX switching because it just gets too
77 * horrendously complex, especially when a task switches from one CPU
78 * to another. Instead we call giveup_altvec in switch_to.
79 * VRSAVE isn't dealt with here, that is done in the normal context
80 * switch code. Note that we could rely on vrsave value to eventually
81 * avoid saving all of the VREGs here...
82 */
83 #ifndef CONFIG_SMP
84 LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
85 toreal(r3)
86 PPC_LL r4,ADDROFF(last_task_used_altivec)(r3)
87 PPC_LCMPI 0,r4,0
88 beq 1f
89
90 /* Save VMX state to last_task_used_altivec's THREAD struct */
91 toreal(r4)
92 addi r4,r4,THREAD
93 SAVE_32VRS(0,r5,r4)
94 mfvscr vr0
95 li r10,THREAD_VSCR
96 stvx vr0,r10,r4
97 /* Disable VMX for last_task_used_altivec */
98 PPC_LL r5,PT_REGS(r4)
99 toreal(r5)
100 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
101 lis r10,MSR_VEC@h
102 andc r4,r4,r10
103 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
104 1:
105 #endif /* CONFIG_SMP */
106
107 /* Hack: if we get an altivec unavailable trap with VRSAVE
108 * set to all zeros, we assume this is a broken application
109 * that fails to set it properly, and thus we switch it to
110 * all 1's
111 */
112 mfspr r4,SPRN_VRSAVE
113 cmpwi 0,r4,0
114 bne+ 1f
115 li r4,-1
116 mtspr SPRN_VRSAVE,r4
117 1:
118 /* enable use of VMX after return */
119 #ifdef CONFIG_PPC32
120 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
121 oris r9,r9,MSR_VEC@h
122 #else
123 ld r4,PACACURRENT(r13)
124 addi r5,r4,THREAD /* Get THREAD */
125 oris r12,r12,MSR_VEC@h
126 std r12,_MSR(r1)
127 #endif
128 li r4,1
129 li r10,THREAD_VSCR
130 stw r4,THREAD_USED_VR(r5)
131 lvx vr0,r10,r5
132 mtvscr vr0
133 REST_32VRS(0,r4,r5)
134 #ifndef CONFIG_SMP
135 /* Update last_task_used_altivec to 'current' */
136 subi r4,r5,THREAD /* Back to 'current' */
137 fromreal(r4)
138 PPC_STL r4,ADDROFF(last_task_used_altivec)(r3)
139 #endif /* CONFIG_SMP */
140 /* restore registers and return */
141 blr
142
143 _GLOBAL(giveup_altivec_notask)
144 mfmsr r3
145 andis. r4,r3,MSR_VEC@h
146 bnelr /* Already enabled? */
147 oris r3,r3,MSR_VEC@h
148 SYNC
149 MTMSRD(r3) /* enable use of VMX now */
150 isync
151 blr
152
153 /*
154 * giveup_altivec(tsk)
155 * Disable VMX for the task given as the argument,
156 * and save the vector registers in its thread_struct.
157 * Enables the VMX for use in the kernel on return.
158 */
159 _GLOBAL(giveup_altivec)
160 mfmsr r5
161 oris r5,r5,MSR_VEC@h
162 SYNC
163 MTMSRD(r5) /* enable use of VMX now */
164 isync
165 PPC_LCMPI 0,r3,0
166 beqlr /* if no previous owner, done */
167 addi r3,r3,THREAD /* want THREAD of task */
168 PPC_LL r5,PT_REGS(r3)
169 PPC_LCMPI 0,r5,0
170 SAVE_32VRS(0,r4,r3)
171 mfvscr vr0
172 li r4,THREAD_VSCR
173 stvx vr0,r4,r3
174 beq 1f
175 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
176 #ifdef CONFIG_VSX
177 BEGIN_FTR_SECTION
178 lis r3,(MSR_VEC|MSR_VSX)@h
179 FTR_SECTION_ELSE
180 lis r3,MSR_VEC@h
181 ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
182 #else
183 lis r3,MSR_VEC@h
184 #endif
185 andc r4,r4,r3 /* disable FP for previous task */
186 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
187 1:
188 #ifndef CONFIG_SMP
189 li r5,0
190 LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
191 PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
192 #endif /* CONFIG_SMP */
193 blr
194
195 #ifdef CONFIG_VSX
196
197 #ifdef CONFIG_PPC32
198 #error This asm code isn't ready for 32-bit kernels
199 #endif
200
201 /*
202 * load_up_vsx(unused, unused, tsk)
203 * Disable VSX for the task which had it previously,
204 * and save its vector registers in its thread_struct.
205 * Reuse the fp and vsx saves, but first check to see if they have
206 * been saved already.
207 */
208 _GLOBAL(load_up_vsx)
209 /* Load FP and VSX registers if they haven't been done yet */
210 andi. r5,r12,MSR_FP
211 beql+ load_up_fpu /* skip if already loaded */
212 andis. r5,r12,MSR_VEC@h
213 beql+ load_up_altivec /* skip if already loaded */
214
215 #ifndef CONFIG_SMP
216 ld r3,last_task_used_vsx@got(r2)
217 ld r4,0(r3)
218 cmpdi 0,r4,0
219 beq 1f
220 /* Disable VSX for last_task_used_vsx */
221 addi r4,r4,THREAD
222 ld r5,PT_REGS(r4)
223 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
224 lis r6,MSR_VSX@h
225 andc r6,r4,r6
226 std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
227 1:
228 #endif /* CONFIG_SMP */
229 ld r4,PACACURRENT(r13)
230 addi r4,r4,THREAD /* Get THREAD */
231 li r6,1
232 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
233 /* enable use of VSX after return */
234 oris r12,r12,MSR_VSX@h
235 std r12,_MSR(r1)
236 #ifndef CONFIG_SMP
237 /* Update last_task_used_vsx to 'current' */
238 ld r4,PACACURRENT(r13)
239 std r4,0(r3)
240 #endif /* CONFIG_SMP */
241 b fast_exception_return
242
243 /*
244 * __giveup_vsx(tsk)
245 * Disable VSX for the task given as the argument.
246 * Does NOT save vsx registers.
247 * Enables the VSX for use in the kernel on return.
248 */
249 _GLOBAL(__giveup_vsx)
250 mfmsr r5
251 oris r5,r5,MSR_VSX@h
252 mtmsrd r5 /* enable use of VSX now */
253 isync
254
255 cmpdi 0,r3,0
256 beqlr- /* if no previous owner, done */
257 addi r3,r3,THREAD /* want THREAD of task */
258 ld r5,PT_REGS(r3)
259 cmpdi 0,r5,0
260 beq 1f
261 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
262 lis r3,MSR_VSX@h
263 andc r4,r4,r3 /* disable VSX for previous task */
264 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
265 1:
266 #ifndef CONFIG_SMP
267 li r5,0
268 ld r4,last_task_used_vsx@got(r2)
269 std r5,0(r4)
270 #endif /* CONFIG_SMP */
271 blr
272
273 #endif /* CONFIG_VSX */
274
275
276 /*
277 * The routines below are in assembler so we can closely control the
278 * usage of floating-point registers. These routines must be called
279 * with preempt disabled.
280 */
281 #ifdef CONFIG_PPC32
282 .data
283 fpzero:
284 .long 0
285 fpone:
286 .long 0x3f800000 /* 1.0 in single-precision FP */
287 fphalf:
288 .long 0x3f000000 /* 0.5 in single-precision FP */
289
290 #define LDCONST(fr, name) \
291 lis r11,name@ha; \
292 lfs fr,name@l(r11)
293 #else
294
295 .section ".toc","aw"
296 fpzero:
297 .tc FD_0_0[TC],0
298 fpone:
299 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
300 fphalf:
301 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
302
303 #define LDCONST(fr, name) \
304 lfd fr,name@toc(r2)
305 #endif
306
307 .text
308 /*
309 * Internal routine to enable floating point and set FPSCR to 0.
310 * Don't call it from C; it doesn't use the normal calling convention.
311 */
312 fpenable:
313 #ifdef CONFIG_PPC32
314 stwu r1,-64(r1)
315 #else
316 stdu r1,-64(r1)
317 #endif
318 mfmsr r10
319 ori r11,r10,MSR_FP
320 mtmsr r11
321 isync
322 stfd fr0,24(r1)
323 stfd fr1,16(r1)
324 stfd fr31,8(r1)
325 LDCONST(fr1, fpzero)
326 mffs fr31
327 MTFSF_L(fr1)
328 blr
329
330 fpdisable:
331 mtlr r12
332 MTFSF_L(fr31)
333 lfd fr31,8(r1)
334 lfd fr1,16(r1)
335 lfd fr0,24(r1)
336 mtmsr r10
337 isync
338 addi r1,r1,64
339 blr
340
341 /*
342 * Vector add, floating point.
343 */
344 _GLOBAL(vaddfp)
345 mflr r12
346 bl fpenable
347 li r0,4
348 mtctr r0
349 li r6,0
350 1: lfsx fr0,r4,r6
351 lfsx fr1,r5,r6
352 fadds fr0,fr0,fr1
353 stfsx fr0,r3,r6
354 addi r6,r6,4
355 bdnz 1b
356 b fpdisable
357
358 /*
359 * Vector subtract, floating point.
360 */
361 _GLOBAL(vsubfp)
362 mflr r12
363 bl fpenable
364 li r0,4
365 mtctr r0
366 li r6,0
367 1: lfsx fr0,r4,r6
368 lfsx fr1,r5,r6
369 fsubs fr0,fr0,fr1
370 stfsx fr0,r3,r6
371 addi r6,r6,4
372 bdnz 1b
373 b fpdisable
374
375 /*
376 * Vector multiply and add, floating point.
377 */
378 _GLOBAL(vmaddfp)
379 mflr r12
380 bl fpenable
381 stfd fr2,32(r1)
382 li r0,4
383 mtctr r0
384 li r7,0
385 1: lfsx fr0,r4,r7
386 lfsx fr1,r5,r7
387 lfsx fr2,r6,r7
388 fmadds fr0,fr0,fr2,fr1
389 stfsx fr0,r3,r7
390 addi r7,r7,4
391 bdnz 1b
392 lfd fr2,32(r1)
393 b fpdisable
394
395 /*
396 * Vector negative multiply and subtract, floating point.
397 */
398 _GLOBAL(vnmsubfp)
399 mflr r12
400 bl fpenable
401 stfd fr2,32(r1)
402 li r0,4
403 mtctr r0
404 li r7,0
405 1: lfsx fr0,r4,r7
406 lfsx fr1,r5,r7
407 lfsx fr2,r6,r7
408 fnmsubs fr0,fr0,fr2,fr1
409 stfsx fr0,r3,r7
410 addi r7,r7,4
411 bdnz 1b
412 lfd fr2,32(r1)
413 b fpdisable
414
415 /*
416 * Vector reciprocal estimate. We just compute 1.0/x.
417 * r3 -> destination, r4 -> source.
418 */
419 _GLOBAL(vrefp)
420 mflr r12
421 bl fpenable
422 li r0,4
423 LDCONST(fr1, fpone)
424 mtctr r0
425 li r6,0
426 1: lfsx fr0,r4,r6
427 fdivs fr0,fr1,fr0
428 stfsx fr0,r3,r6
429 addi r6,r6,4
430 bdnz 1b
431 b fpdisable
432
433 /*
434 * Vector reciprocal square-root estimate, floating point.
435 * We use the frsqrte instruction for the initial estimate followed
436 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
437 * r3 -> destination, r4 -> source.
438 */
439 _GLOBAL(vrsqrtefp)
440 mflr r12
441 bl fpenable
442 stfd fr2,32(r1)
443 stfd fr3,40(r1)
444 stfd fr4,48(r1)
445 stfd fr5,56(r1)
446 li r0,4
447 LDCONST(fr4, fpone)
448 LDCONST(fr5, fphalf)
449 mtctr r0
450 li r6,0
451 1: lfsx fr0,r4,r6
452 frsqrte fr1,fr0 /* r = frsqrte(s) */
453 fmuls fr3,fr1,fr0 /* r * s */
454 fmuls fr2,fr1,fr5 /* r * 0.5 */
455 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
456 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
457 fmuls fr3,fr1,fr0 /* r * s */
458 fmuls fr2,fr1,fr5 /* r * 0.5 */
459 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
460 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
461 stfsx fr1,r3,r6
462 addi r6,r6,4
463 bdnz 1b
464 lfd fr5,56(r1)
465 lfd fr4,48(r1)
466 lfd fr3,40(r1)
467 lfd fr2,32(r1)
468 b fpdisable
This page took 0.04208 seconds and 5 git commands to generate.