powerpc: Don't disable MSR bits in do_load_up_transact_*() functions
[deliverable/linux.git] / arch / powerpc / kernel / vector.S
1 #include <asm/processor.h>
2 #include <asm/ppc_asm.h>
3 #include <asm/reg.h>
4 #include <asm/asm-offsets.h>
5 #include <asm/cputable.h>
6 #include <asm/thread_info.h>
7 #include <asm/page.h>
8 #include <asm/ptrace.h>
9
10 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
11 /* void do_load_up_transact_altivec(struct thread_struct *thread)
12 *
13 * This is similar to load_up_altivec but for the transactional version of the
14 * vector regs. It doesn't mess with the task MSR or valid flags.
15 * Furthermore, VEC laziness is not supported with TM currently.
16 */
17 _GLOBAL(do_load_up_transact_altivec)
18 mfmsr r6
19 oris r5,r6,MSR_VEC@h
20 MTMSRD(r5)
21 isync
22
23 li r4,1
24 stw r4,THREAD_USED_VR(r3)
25
26 li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
27 lvx v0,r10,r3
28 mtvscr v0
29 addi r10,r3,THREAD_TRANSACT_VRSTATE
30 REST_32VRS(0,r4,r10)
31
32 blr
33 #endif
34
35 /*
36 * Enable use of VMX/Altivec for the caller.
37 */
38 _GLOBAL(vec_enable)
39 mfmsr r3
40 oris r3,r3,MSR_VEC@h
41 MTMSRD(r3)
42 isync
43 blr
44
45 /*
46 * Load state from memory into VMX registers including VSCR.
47 * Assumes the caller has enabled VMX in the MSR.
48 */
49 _GLOBAL(load_vr_state)
50 li r4,VRSTATE_VSCR
51 lvx v0,r4,r3
52 mtvscr v0
53 REST_32VRS(0,r4,r3)
54 blr
55
56 /*
57 * Store VMX state into memory, including VSCR.
58 * Assumes the caller has enabled VMX in the MSR.
59 */
60 _GLOBAL(store_vr_state)
61 SAVE_32VRS(0, r4, r3)
62 mfvscr v0
63 li r4, VRSTATE_VSCR
64 stvx v0, r4, r3
65 blr
66
67 /*
68 * Disable VMX for the task which had it previously,
69 * and save its vector registers in its thread_struct.
70 * Enables the VMX for use in the kernel on return.
71 * On SMP we know the VMX is free, since we give it up every
72 * switch (ie, no lazy save of the vector registers).
73 *
74 * Note that on 32-bit this can only use registers that will be
75 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
76 */
77 _GLOBAL(load_up_altivec)
78 mfmsr r5 /* grab the current MSR */
79 oris r5,r5,MSR_VEC@h
80 MTMSRD(r5) /* enable use of AltiVec now */
81 isync
82
83 /*
84 * For SMP, we don't do lazy VMX switching because it just gets too
85 * horrendously complex, especially when a task switches from one CPU
86 * to another. Instead we call giveup_altvec in switch_to.
87 * VRSAVE isn't dealt with here, that is done in the normal context
88 * switch code. Note that we could rely on vrsave value to eventually
89 * avoid saving all of the VREGs here...
90 */
91 #ifndef CONFIG_SMP
92 LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
93 toreal(r3)
94 PPC_LL r4,ADDROFF(last_task_used_altivec)(r3)
95 PPC_LCMPI 0,r4,0
96 beq 1f
97
98 /* Save VMX state to last_task_used_altivec's THREAD struct */
99 toreal(r4)
100 addi r4,r4,THREAD
101 addi r6,r4,THREAD_VRSTATE
102 SAVE_32VRS(0,r5,r6)
103 mfvscr v0
104 li r10,VRSTATE_VSCR
105 stvx v0,r10,r6
106 /* Disable VMX for last_task_used_altivec */
107 PPC_LL r5,PT_REGS(r4)
108 toreal(r5)
109 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
110 lis r10,MSR_VEC@h
111 andc r4,r4,r10
112 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
113 1:
114 #endif /* CONFIG_SMP */
115
116 /* Hack: if we get an altivec unavailable trap with VRSAVE
117 * set to all zeros, we assume this is a broken application
118 * that fails to set it properly, and thus we switch it to
119 * all 1's
120 */
121 mfspr r4,SPRN_VRSAVE
122 cmpwi 0,r4,0
123 bne+ 1f
124 li r4,-1
125 mtspr SPRN_VRSAVE,r4
126 1:
127 /* enable use of VMX after return */
128 #ifdef CONFIG_PPC32
129 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
130 oris r9,r9,MSR_VEC@h
131 #else
132 ld r4,PACACURRENT(r13)
133 addi r5,r4,THREAD /* Get THREAD */
134 oris r12,r12,MSR_VEC@h
135 std r12,_MSR(r1)
136 #endif
137 addi r6,r5,THREAD_VRSTATE
138 li r4,1
139 li r10,VRSTATE_VSCR
140 stw r4,THREAD_USED_VR(r5)
141 lvx v0,r10,r6
142 mtvscr v0
143 REST_32VRS(0,r4,r6)
144 #ifndef CONFIG_SMP
145 /* Update last_task_used_altivec to 'current' */
146 subi r4,r5,THREAD /* Back to 'current' */
147 fromreal(r4)
148 PPC_STL r4,ADDROFF(last_task_used_altivec)(r3)
149 #endif /* CONFIG_SMP */
150 /* restore registers and return */
151 blr
152
153 _GLOBAL(giveup_altivec_notask)
154 mfmsr r3
155 andis. r4,r3,MSR_VEC@h
156 bnelr /* Already enabled? */
157 oris r3,r3,MSR_VEC@h
158 SYNC
159 MTMSRD(r3) /* enable use of VMX now */
160 isync
161 blr
162
163 /*
164 * giveup_altivec(tsk)
165 * Disable VMX for the task given as the argument,
166 * and save the vector registers in its thread_struct.
167 * Enables the VMX for use in the kernel on return.
168 */
169 _GLOBAL(giveup_altivec)
170 mfmsr r5
171 oris r5,r5,MSR_VEC@h
172 SYNC
173 MTMSRD(r5) /* enable use of VMX now */
174 isync
175 PPC_LCMPI 0,r3,0
176 beqlr /* if no previous owner, done */
177 addi r3,r3,THREAD /* want THREAD of task */
178 PPC_LL r7,THREAD_VRSAVEAREA(r3)
179 PPC_LL r5,PT_REGS(r3)
180 PPC_LCMPI 0,r7,0
181 bne 2f
182 addi r7,r3,THREAD_VRSTATE
183 2: PPC_LCMPI 0,r5,0
184 SAVE_32VRS(0,r4,r7)
185 mfvscr v0
186 li r4,VRSTATE_VSCR
187 stvx v0,r4,r7
188 beq 1f
189 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
190 #ifdef CONFIG_VSX
191 BEGIN_FTR_SECTION
192 lis r3,(MSR_VEC|MSR_VSX)@h
193 FTR_SECTION_ELSE
194 lis r3,MSR_VEC@h
195 ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
196 #else
197 lis r3,MSR_VEC@h
198 #endif
199 andc r4,r4,r3 /* disable FP for previous task */
200 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
201 1:
202 #ifndef CONFIG_SMP
203 li r5,0
204 LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
205 PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
206 #endif /* CONFIG_SMP */
207 blr
208
209 #ifdef CONFIG_VSX
210
211 #ifdef CONFIG_PPC32
212 #error This asm code isn't ready for 32-bit kernels
213 #endif
214
215 /*
216 * load_up_vsx(unused, unused, tsk)
217 * Disable VSX for the task which had it previously,
218 * and save its vector registers in its thread_struct.
219 * Reuse the fp and vsx saves, but first check to see if they have
220 * been saved already.
221 */
222 _GLOBAL(load_up_vsx)
223 /* Load FP and VSX registers if they haven't been done yet */
224 andi. r5,r12,MSR_FP
225 beql+ load_up_fpu /* skip if already loaded */
226 andis. r5,r12,MSR_VEC@h
227 beql+ load_up_altivec /* skip if already loaded */
228
229 #ifndef CONFIG_SMP
230 ld r3,last_task_used_vsx@got(r2)
231 ld r4,0(r3)
232 cmpdi 0,r4,0
233 beq 1f
234 /* Disable VSX for last_task_used_vsx */
235 addi r4,r4,THREAD
236 ld r5,PT_REGS(r4)
237 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
238 lis r6,MSR_VSX@h
239 andc r6,r4,r6
240 std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
241 1:
242 #endif /* CONFIG_SMP */
243 ld r4,PACACURRENT(r13)
244 addi r4,r4,THREAD /* Get THREAD */
245 li r6,1
246 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
247 /* enable use of VSX after return */
248 oris r12,r12,MSR_VSX@h
249 std r12,_MSR(r1)
250 #ifndef CONFIG_SMP
251 /* Update last_task_used_vsx to 'current' */
252 ld r4,PACACURRENT(r13)
253 std r4,0(r3)
254 #endif /* CONFIG_SMP */
255 b fast_exception_return
256
257 /*
258 * __giveup_vsx(tsk)
259 * Disable VSX for the task given as the argument.
260 * Does NOT save vsx registers.
261 * Enables the VSX for use in the kernel on return.
262 */
263 _GLOBAL(__giveup_vsx)
264 mfmsr r5
265 oris r5,r5,MSR_VSX@h
266 mtmsrd r5 /* enable use of VSX now */
267 isync
268
269 cmpdi 0,r3,0
270 beqlr- /* if no previous owner, done */
271 addi r3,r3,THREAD /* want THREAD of task */
272 ld r5,PT_REGS(r3)
273 cmpdi 0,r5,0
274 beq 1f
275 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
276 lis r3,MSR_VSX@h
277 andc r4,r4,r3 /* disable VSX for previous task */
278 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
279 1:
280 #ifndef CONFIG_SMP
281 li r5,0
282 ld r4,last_task_used_vsx@got(r2)
283 std r5,0(r4)
284 #endif /* CONFIG_SMP */
285 blr
286
287 #endif /* CONFIG_VSX */
288
289
290 /*
291 * The routines below are in assembler so we can closely control the
292 * usage of floating-point registers. These routines must be called
293 * with preempt disabled.
294 */
295 #ifdef CONFIG_PPC32
296 .data
297 fpzero:
298 .long 0
299 fpone:
300 .long 0x3f800000 /* 1.0 in single-precision FP */
301 fphalf:
302 .long 0x3f000000 /* 0.5 in single-precision FP */
303
304 #define LDCONST(fr, name) \
305 lis r11,name@ha; \
306 lfs fr,name@l(r11)
307 #else
308
309 .section ".toc","aw"
310 fpzero:
311 .tc FD_0_0[TC],0
312 fpone:
313 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
314 fphalf:
315 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
316
317 #define LDCONST(fr, name) \
318 lfd fr,name@toc(r2)
319 #endif
320
321 .text
322 /*
323 * Internal routine to enable floating point and set FPSCR to 0.
324 * Don't call it from C; it doesn't use the normal calling convention.
325 */
326 fpenable:
327 #ifdef CONFIG_PPC32
328 stwu r1,-64(r1)
329 #else
330 stdu r1,-64(r1)
331 #endif
332 mfmsr r10
333 ori r11,r10,MSR_FP
334 mtmsr r11
335 isync
336 stfd fr0,24(r1)
337 stfd fr1,16(r1)
338 stfd fr31,8(r1)
339 LDCONST(fr1, fpzero)
340 mffs fr31
341 MTFSF_L(fr1)
342 blr
343
344 fpdisable:
345 mtlr r12
346 MTFSF_L(fr31)
347 lfd fr31,8(r1)
348 lfd fr1,16(r1)
349 lfd fr0,24(r1)
350 mtmsr r10
351 isync
352 addi r1,r1,64
353 blr
354
355 /*
356 * Vector add, floating point.
357 */
358 _GLOBAL(vaddfp)
359 mflr r12
360 bl fpenable
361 li r0,4
362 mtctr r0
363 li r6,0
364 1: lfsx fr0,r4,r6
365 lfsx fr1,r5,r6
366 fadds fr0,fr0,fr1
367 stfsx fr0,r3,r6
368 addi r6,r6,4
369 bdnz 1b
370 b fpdisable
371
372 /*
373 * Vector subtract, floating point.
374 */
375 _GLOBAL(vsubfp)
376 mflr r12
377 bl fpenable
378 li r0,4
379 mtctr r0
380 li r6,0
381 1: lfsx fr0,r4,r6
382 lfsx fr1,r5,r6
383 fsubs fr0,fr0,fr1
384 stfsx fr0,r3,r6
385 addi r6,r6,4
386 bdnz 1b
387 b fpdisable
388
389 /*
390 * Vector multiply and add, floating point.
391 */
392 _GLOBAL(vmaddfp)
393 mflr r12
394 bl fpenable
395 stfd fr2,32(r1)
396 li r0,4
397 mtctr r0
398 li r7,0
399 1: lfsx fr0,r4,r7
400 lfsx fr1,r5,r7
401 lfsx fr2,r6,r7
402 fmadds fr0,fr0,fr2,fr1
403 stfsx fr0,r3,r7
404 addi r7,r7,4
405 bdnz 1b
406 lfd fr2,32(r1)
407 b fpdisable
408
409 /*
410 * Vector negative multiply and subtract, floating point.
411 */
412 _GLOBAL(vnmsubfp)
413 mflr r12
414 bl fpenable
415 stfd fr2,32(r1)
416 li r0,4
417 mtctr r0
418 li r7,0
419 1: lfsx fr0,r4,r7
420 lfsx fr1,r5,r7
421 lfsx fr2,r6,r7
422 fnmsubs fr0,fr0,fr2,fr1
423 stfsx fr0,r3,r7
424 addi r7,r7,4
425 bdnz 1b
426 lfd fr2,32(r1)
427 b fpdisable
428
429 /*
430 * Vector reciprocal estimate. We just compute 1.0/x.
431 * r3 -> destination, r4 -> source.
432 */
433 _GLOBAL(vrefp)
434 mflr r12
435 bl fpenable
436 li r0,4
437 LDCONST(fr1, fpone)
438 mtctr r0
439 li r6,0
440 1: lfsx fr0,r4,r6
441 fdivs fr0,fr1,fr0
442 stfsx fr0,r3,r6
443 addi r6,r6,4
444 bdnz 1b
445 b fpdisable
446
447 /*
448 * Vector reciprocal square-root estimate, floating point.
449 * We use the frsqrte instruction for the initial estimate followed
450 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
451 * r3 -> destination, r4 -> source.
452 */
453 _GLOBAL(vrsqrtefp)
454 mflr r12
455 bl fpenable
456 stfd fr2,32(r1)
457 stfd fr3,40(r1)
458 stfd fr4,48(r1)
459 stfd fr5,56(r1)
460 li r0,4
461 LDCONST(fr4, fpone)
462 LDCONST(fr5, fphalf)
463 mtctr r0
464 li r6,0
465 1: lfsx fr0,r4,r6
466 frsqrte fr1,fr0 /* r = frsqrte(s) */
467 fmuls fr3,fr1,fr0 /* r * s */
468 fmuls fr2,fr1,fr5 /* r * 0.5 */
469 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
470 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
471 fmuls fr3,fr1,fr0 /* r * s */
472 fmuls fr2,fr1,fr5 /* r * 0.5 */
473 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
474 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
475 stfsx fr1,r3,r6
476 addi r6,r6,4
477 bdnz 1b
478 lfd fr5,56(r1)
479 lfd fr4,48(r1)
480 lfd fr3,40(r1)
481 lfd fr2,32(r1)
482 b fpdisable
This page took 0.04047 seconds and 5 git commands to generate.