Merge branch 'omap-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind...
[deliverable/linux.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11
12 .align 7
13 _GLOBAL(__copy_tofrom_user)
14 /* first check for a whole page copy on a page boundary */
15 cmpldi cr1,r5,16
16 cmpdi cr6,r5,4096
17 or r0,r3,r4
18 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
19 andi. r0,r0,4095
20 std r3,-24(r1)
21 crand cr0*4+2,cr0*4+2,cr6*4+2
22 std r4,-16(r1)
23 std r5,-8(r1)
24 dcbt 0,r4
25 beq .Lcopy_page_4K
26 andi. r6,r6,7
27 PPC_MTOCRF 0x01,r5
28 blt cr1,.Lshort_copy
29 /* Below we want to nop out the bne if we're on a CPU that has the
30 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
31 * cleared.
32 * At the time of writing the only CPU that has this combination of bits
33 * set is Power6.
34 */
35 BEGIN_FTR_SECTION
36 nop
37 FTR_SECTION_ELSE
38 bne .Ldst_unaligned
39 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
40 CPU_FTR_UNALIGNED_LD_STD)
41 .Ldst_aligned:
42 addi r3,r3,-16
43 BEGIN_FTR_SECTION
44 andi. r0,r4,7
45 bne .Lsrc_unaligned
46 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
47 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
48 srdi r0,r5,5
49 cmpdi cr1,r0,0
50 20: ld r7,0(r4)
51 220: ld r6,8(r4)
52 addi r4,r4,16
53 mtctr r0
54 andi. r0,r5,0x10
55 beq 22f
56 addi r3,r3,16
57 addi r4,r4,-16
58 mr r9,r7
59 mr r8,r6
60 beq cr1,72f
61 21: ld r7,16(r4)
62 221: ld r6,24(r4)
63 addi r4,r4,32
64 70: std r9,0(r3)
65 270: std r8,8(r3)
66 22: ld r9,0(r4)
67 222: ld r8,8(r4)
68 71: std r7,16(r3)
69 271: std r6,24(r3)
70 addi r3,r3,32
71 bdnz 21b
72 72: std r9,0(r3)
73 272: std r8,8(r3)
74 andi. r5,r5,0xf
75 beq+ 3f
76 addi r4,r4,16
77 .Ldo_tail:
78 addi r3,r3,16
79 bf cr7*4+0,246f
80 244: ld r9,0(r4)
81 addi r4,r4,8
82 245: std r9,0(r3)
83 addi r3,r3,8
84 246: bf cr7*4+1,1f
85 23: lwz r9,0(r4)
86 addi r4,r4,4
87 73: stw r9,0(r3)
88 addi r3,r3,4
89 1: bf cr7*4+2,2f
90 44: lhz r9,0(r4)
91 addi r4,r4,2
92 74: sth r9,0(r3)
93 addi r3,r3,2
94 2: bf cr7*4+3,3f
95 45: lbz r9,0(r4)
96 75: stb r9,0(r3)
97 3: li r3,0
98 blr
99
100 .Lsrc_unaligned:
101 srdi r6,r5,3
102 addi r5,r5,-16
103 subf r4,r0,r4
104 srdi r7,r5,4
105 sldi r10,r0,3
106 cmpldi cr6,r6,3
107 andi. r5,r5,7
108 mtctr r7
109 subfic r11,r10,64
110 add r5,r5,r0
111 bt cr7*4+0,28f
112
113 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
114 25: ld r0,8(r4)
115 sld r6,r9,r10
116 26: ldu r9,16(r4)
117 srd r7,r0,r11
118 sld r8,r0,r10
119 or r7,r7,r6
120 blt cr6,79f
121 27: ld r0,8(r4)
122 b 2f
123
124 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
125 29: ldu r9,8(r4)
126 sld r8,r0,r10
127 addi r3,r3,-8
128 blt cr6,5f
129 30: ld r0,8(r4)
130 srd r12,r9,r11
131 sld r6,r9,r10
132 31: ldu r9,16(r4)
133 or r12,r8,r12
134 srd r7,r0,r11
135 sld r8,r0,r10
136 addi r3,r3,16
137 beq cr6,78f
138
139 1: or r7,r7,r6
140 32: ld r0,8(r4)
141 76: std r12,8(r3)
142 2: srd r12,r9,r11
143 sld r6,r9,r10
144 33: ldu r9,16(r4)
145 or r12,r8,r12
146 77: stdu r7,16(r3)
147 srd r7,r0,r11
148 sld r8,r0,r10
149 bdnz 1b
150
151 78: std r12,8(r3)
152 or r7,r7,r6
153 79: std r7,16(r3)
154 5: srd r12,r9,r11
155 or r12,r8,r12
156 80: std r12,24(r3)
157 bne 6f
158 li r3,0
159 blr
160 6: cmpwi cr1,r5,8
161 addi r3,r3,32
162 sld r9,r9,r10
163 ble cr1,7f
164 34: ld r0,8(r4)
165 srd r7,r0,r11
166 or r9,r7,r9
167 7:
168 bf cr7*4+1,1f
169 rotldi r9,r9,32
170 94: stw r9,0(r3)
171 addi r3,r3,4
172 1: bf cr7*4+2,2f
173 rotldi r9,r9,16
174 95: sth r9,0(r3)
175 addi r3,r3,2
176 2: bf cr7*4+3,3f
177 rotldi r9,r9,8
178 96: stb r9,0(r3)
179 3: li r3,0
180 blr
181
182 .Ldst_unaligned:
183 PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
184 subf r5,r6,r5
185 li r7,0
186 cmpldi cr1,r5,16
187 bf cr7*4+3,1f
188 35: lbz r0,0(r4)
189 81: stb r0,0(r3)
190 addi r7,r7,1
191 1: bf cr7*4+2,2f
192 36: lhzx r0,r7,r4
193 82: sthx r0,r7,r3
194 addi r7,r7,2
195 2: bf cr7*4+1,3f
196 37: lwzx r0,r7,r4
197 83: stwx r0,r7,r3
198 3: PPC_MTOCRF 0x01,r5
199 add r4,r6,r4
200 add r3,r6,r3
201 b .Ldst_aligned
202
203 .Lshort_copy:
204 bf cr7*4+0,1f
205 38: lwz r0,0(r4)
206 39: lwz r9,4(r4)
207 addi r4,r4,8
208 84: stw r0,0(r3)
209 85: stw r9,4(r3)
210 addi r3,r3,8
211 1: bf cr7*4+1,2f
212 40: lwz r0,0(r4)
213 addi r4,r4,4
214 86: stw r0,0(r3)
215 addi r3,r3,4
216 2: bf cr7*4+2,3f
217 41: lhz r0,0(r4)
218 addi r4,r4,2
219 87: sth r0,0(r3)
220 addi r3,r3,2
221 3: bf cr7*4+3,4f
222 42: lbz r0,0(r4)
223 88: stb r0,0(r3)
224 4: li r3,0
225 blr
226
227 /*
228 * exception handlers follow
229 * we have to return the number of bytes not copied
230 * for an exception on a load, we set the rest of the destination to 0
231 */
232
233 136:
234 137:
235 add r3,r3,r7
236 b 1f
237 130:
238 131:
239 addi r3,r3,8
240 120:
241 320:
242 122:
243 322:
244 124:
245 125:
246 126:
247 127:
248 128:
249 129:
250 133:
251 addi r3,r3,8
252 132:
253 addi r3,r3,8
254 121:
255 321:
256 344:
257 134:
258 135:
259 138:
260 139:
261 140:
262 141:
263 142:
264 123:
265 144:
266 145:
267
268 /*
269 * here we have had a fault on a load and r3 points to the first
270 * unmodified byte of the destination
271 */
272 1: ld r6,-24(r1)
273 ld r4,-16(r1)
274 ld r5,-8(r1)
275 subf r6,r6,r3
276 add r4,r4,r6
277 subf r5,r6,r5 /* #bytes left to go */
278
279 /*
280 * first see if we can copy any more bytes before hitting another exception
281 */
282 mtctr r5
283 43: lbz r0,0(r4)
284 addi r4,r4,1
285 89: stb r0,0(r3)
286 addi r3,r3,1
287 bdnz 43b
288 li r3,0 /* huh? all copied successfully this time? */
289 blr
290
291 /*
292 * here we have trapped again, need to clear ctr bytes starting at r3
293 */
294 143: mfctr r5
295 li r0,0
296 mr r4,r3
297 mr r3,r5 /* return the number of bytes not copied */
298 1: andi. r9,r4,7
299 beq 3f
300 90: stb r0,0(r4)
301 addic. r5,r5,-1
302 addi r4,r4,1
303 bne 1b
304 blr
305 3: cmpldi cr1,r5,8
306 srdi r9,r5,3
307 andi. r5,r5,7
308 blt cr1,93f
309 mtctr r9
310 91: std r0,0(r4)
311 addi r4,r4,8
312 bdnz 91b
313 93: beqlr
314 mtctr r5
315 92: stb r0,0(r4)
316 addi r4,r4,1
317 bdnz 92b
318 blr
319
320 /*
321 * exception handlers for stores: we just need to work
322 * out how many bytes weren't copied
323 */
324 182:
325 183:
326 add r3,r3,r7
327 b 1f
328 371:
329 180:
330 addi r3,r3,8
331 171:
332 177:
333 addi r3,r3,8
334 370:
335 372:
336 176:
337 178:
338 addi r3,r3,4
339 185:
340 addi r3,r3,4
341 170:
342 172:
343 345:
344 173:
345 174:
346 175:
347 179:
348 181:
349 184:
350 186:
351 187:
352 188:
353 189:
354 194:
355 195:
356 196:
357 1:
358 ld r6,-24(r1)
359 ld r5,-8(r1)
360 add r6,r6,r5
361 subf r3,r3,r6 /* #bytes not copied */
362 190:
363 191:
364 192:
365 blr /* #bytes not copied in r3 */
366
367 .section __ex_table,"a"
368 .align 3
369 .llong 20b,120b
370 .llong 220b,320b
371 .llong 21b,121b
372 .llong 221b,321b
373 .llong 70b,170b
374 .llong 270b,370b
375 .llong 22b,122b
376 .llong 222b,322b
377 .llong 71b,171b
378 .llong 271b,371b
379 .llong 72b,172b
380 .llong 272b,372b
381 .llong 244b,344b
382 .llong 245b,345b
383 .llong 23b,123b
384 .llong 73b,173b
385 .llong 44b,144b
386 .llong 74b,174b
387 .llong 45b,145b
388 .llong 75b,175b
389 .llong 24b,124b
390 .llong 25b,125b
391 .llong 26b,126b
392 .llong 27b,127b
393 .llong 28b,128b
394 .llong 29b,129b
395 .llong 30b,130b
396 .llong 31b,131b
397 .llong 32b,132b
398 .llong 76b,176b
399 .llong 33b,133b
400 .llong 77b,177b
401 .llong 78b,178b
402 .llong 79b,179b
403 .llong 80b,180b
404 .llong 34b,134b
405 .llong 94b,194b
406 .llong 95b,195b
407 .llong 96b,196b
408 .llong 35b,135b
409 .llong 81b,181b
410 .llong 36b,136b
411 .llong 82b,182b
412 .llong 37b,137b
413 .llong 83b,183b
414 .llong 38b,138b
415 .llong 39b,139b
416 .llong 84b,184b
417 .llong 85b,185b
418 .llong 40b,140b
419 .llong 86b,186b
420 .llong 41b,141b
421 .llong 87b,187b
422 .llong 42b,142b
423 .llong 88b,188b
424 .llong 43b,143b
425 .llong 89b,189b
426 .llong 90b,190b
427 .llong 91b,191b
428 .llong 92b,192b
429
430 .text
431
432 /*
433 * Routine to copy a whole page of data, optimized for POWER4.
434 * On POWER4 it is more than 50% faster than the simple loop
435 * above (following the .Ldst_aligned label) but it runs slightly
436 * slower on POWER3.
437 */
438 .Lcopy_page_4K:
439 std r31,-32(1)
440 std r30,-40(1)
441 std r29,-48(1)
442 std r28,-56(1)
443 std r27,-64(1)
444 std r26,-72(1)
445 std r25,-80(1)
446 std r24,-88(1)
447 std r23,-96(1)
448 std r22,-104(1)
449 std r21,-112(1)
450 std r20,-120(1)
451 li r5,4096/32 - 1
452 addi r3,r3,-8
453 li r0,5
454 0: addi r5,r5,-24
455 mtctr r0
456 20: ld r22,640(4)
457 21: ld r21,512(4)
458 22: ld r20,384(4)
459 23: ld r11,256(4)
460 24: ld r9,128(4)
461 25: ld r7,0(4)
462 26: ld r25,648(4)
463 27: ld r24,520(4)
464 28: ld r23,392(4)
465 29: ld r10,264(4)
466 30: ld r8,136(4)
467 31: ldu r6,8(4)
468 cmpwi r5,24
469 1:
470 32: std r22,648(3)
471 33: std r21,520(3)
472 34: std r20,392(3)
473 35: std r11,264(3)
474 36: std r9,136(3)
475 37: std r7,8(3)
476 38: ld r28,648(4)
477 39: ld r27,520(4)
478 40: ld r26,392(4)
479 41: ld r31,264(4)
480 42: ld r30,136(4)
481 43: ld r29,8(4)
482 44: std r25,656(3)
483 45: std r24,528(3)
484 46: std r23,400(3)
485 47: std r10,272(3)
486 48: std r8,144(3)
487 49: std r6,16(3)
488 50: ld r22,656(4)
489 51: ld r21,528(4)
490 52: ld r20,400(4)
491 53: ld r11,272(4)
492 54: ld r9,144(4)
493 55: ld r7,16(4)
494 56: std r28,664(3)
495 57: std r27,536(3)
496 58: std r26,408(3)
497 59: std r31,280(3)
498 60: std r30,152(3)
499 61: stdu r29,24(3)
500 62: ld r25,664(4)
501 63: ld r24,536(4)
502 64: ld r23,408(4)
503 65: ld r10,280(4)
504 66: ld r8,152(4)
505 67: ldu r6,24(4)
506 bdnz 1b
507 68: std r22,648(3)
508 69: std r21,520(3)
509 70: std r20,392(3)
510 71: std r11,264(3)
511 72: std r9,136(3)
512 73: std r7,8(3)
513 74: addi r4,r4,640
514 75: addi r3,r3,648
515 bge 0b
516 mtctr r5
517 76: ld r7,0(4)
518 77: ld r8,8(4)
519 78: ldu r9,16(4)
520 3:
521 79: ld r10,8(4)
522 80: std r7,8(3)
523 81: ld r7,16(4)
524 82: std r8,16(3)
525 83: ld r8,24(4)
526 84: std r9,24(3)
527 85: ldu r9,32(4)
528 86: stdu r10,32(3)
529 bdnz 3b
530 4:
531 87: ld r10,8(4)
532 88: std r7,8(3)
533 89: std r8,16(3)
534 90: std r9,24(3)
535 91: std r10,32(3)
536 9: ld r20,-120(1)
537 ld r21,-112(1)
538 ld r22,-104(1)
539 ld r23,-96(1)
540 ld r24,-88(1)
541 ld r25,-80(1)
542 ld r26,-72(1)
543 ld r27,-64(1)
544 ld r28,-56(1)
545 ld r29,-48(1)
546 ld r30,-40(1)
547 ld r31,-32(1)
548 li r3,0
549 blr
550
551 /*
552 * on an exception, reset to the beginning and jump back into the
553 * standard __copy_tofrom_user
554 */
555 100: ld r20,-120(1)
556 ld r21,-112(1)
557 ld r22,-104(1)
558 ld r23,-96(1)
559 ld r24,-88(1)
560 ld r25,-80(1)
561 ld r26,-72(1)
562 ld r27,-64(1)
563 ld r28,-56(1)
564 ld r29,-48(1)
565 ld r30,-40(1)
566 ld r31,-32(1)
567 ld r3,-24(r1)
568 ld r4,-16(r1)
569 li r5,4096
570 b .Ldst_aligned
571
572 .section __ex_table,"a"
573 .align 3
574 .llong 20b,100b
575 .llong 21b,100b
576 .llong 22b,100b
577 .llong 23b,100b
578 .llong 24b,100b
579 .llong 25b,100b
580 .llong 26b,100b
581 .llong 27b,100b
582 .llong 28b,100b
583 .llong 29b,100b
584 .llong 30b,100b
585 .llong 31b,100b
586 .llong 32b,100b
587 .llong 33b,100b
588 .llong 34b,100b
589 .llong 35b,100b
590 .llong 36b,100b
591 .llong 37b,100b
592 .llong 38b,100b
593 .llong 39b,100b
594 .llong 40b,100b
595 .llong 41b,100b
596 .llong 42b,100b
597 .llong 43b,100b
598 .llong 44b,100b
599 .llong 45b,100b
600 .llong 46b,100b
601 .llong 47b,100b
602 .llong 48b,100b
603 .llong 49b,100b
604 .llong 50b,100b
605 .llong 51b,100b
606 .llong 52b,100b
607 .llong 53b,100b
608 .llong 54b,100b
609 .llong 55b,100b
610 .llong 56b,100b
611 .llong 57b,100b
612 .llong 58b,100b
613 .llong 59b,100b
614 .llong 60b,100b
615 .llong 61b,100b
616 .llong 62b,100b
617 .llong 63b,100b
618 .llong 64b,100b
619 .llong 65b,100b
620 .llong 66b,100b
621 .llong 67b,100b
622 .llong 68b,100b
623 .llong 69b,100b
624 .llong 70b,100b
625 .llong 71b,100b
626 .llong 72b,100b
627 .llong 73b,100b
628 .llong 74b,100b
629 .llong 75b,100b
630 .llong 76b,100b
631 .llong 77b,100b
632 .llong 78b,100b
633 .llong 79b,100b
634 .llong 80b,100b
635 .llong 81b,100b
636 .llong 82b,100b
637 .llong 83b,100b
638 .llong 84b,100b
639 .llong 85b,100b
640 .llong 86b,100b
641 .llong 87b,100b
642 .llong 88b,100b
643 .llong 89b,100b
644 .llong 90b,100b
645 .llong 91b,100b
This page took 0.047384 seconds and 5 git commands to generate.