Merge commit 'gcl/next' into next
[deliverable/linux.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11
12 .align 7
13 _GLOBAL(__copy_tofrom_user)
14 /* first check for a whole page copy on a page boundary */
15 cmpldi cr1,r5,16
16 cmpdi cr6,r5,4096
17 or r0,r3,r4
18 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
19 andi. r0,r0,4095
20 std r3,-24(r1)
21 crand cr0*4+2,cr0*4+2,cr6*4+2
22 std r4,-16(r1)
23 std r5,-8(r1)
24 dcbt 0,r4
25 beq .Lcopy_page_4K
26 andi. r6,r6,7
27 PPC_MTOCRF 0x01,r5
28 blt cr1,.Lshort_copy
29 /* Below we want to nop out the bne if we're on a CPU that has the
30 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
31 * cleared.
32 * At the time of writing the only CPU that has this combination of bits
33 * set is Power6.
34 */
35 BEGIN_FTR_SECTION
36 nop
37 FTR_SECTION_ELSE
38 bne .Ldst_unaligned
39 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
40 CPU_FTR_UNALIGNED_LD_STD)
41 .Ldst_aligned:
42 addi r3,r3,-16
43 BEGIN_FTR_SECTION
44 andi. r0,r4,7
45 bne .Lsrc_unaligned
46 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
47 srdi r7,r5,4
48 20: ld r9,0(r4)
49 addi r4,r4,-8
50 mtctr r7
51 andi. r5,r5,7
52 bf cr7*4+0,22f
53 addi r3,r3,8
54 addi r4,r4,8
55 mr r8,r9
56 blt cr1,72f
57 21: ld r9,8(r4)
58 70: std r8,8(r3)
59 22: ldu r8,16(r4)
60 71: stdu r9,16(r3)
61 bdnz 21b
62 72: std r8,8(r3)
63 beq+ 3f
64 addi r3,r3,16
65 23: ld r9,8(r4)
66 .Ldo_tail:
67 bf cr7*4+1,1f
68 rotldi r9,r9,32
69 73: stw r9,0(r3)
70 addi r3,r3,4
71 1: bf cr7*4+2,2f
72 rotldi r9,r9,16
73 74: sth r9,0(r3)
74 addi r3,r3,2
75 2: bf cr7*4+3,3f
76 rotldi r9,r9,8
77 75: stb r9,0(r3)
78 3: li r3,0
79 blr
80
81 .Lsrc_unaligned:
82 srdi r6,r5,3
83 addi r5,r5,-16
84 subf r4,r0,r4
85 srdi r7,r5,4
86 sldi r10,r0,3
87 cmpldi cr6,r6,3
88 andi. r5,r5,7
89 mtctr r7
90 subfic r11,r10,64
91 add r5,r5,r0
92 bt cr7*4+0,28f
93
94 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
95 25: ld r0,8(r4)
96 sld r6,r9,r10
97 26: ldu r9,16(r4)
98 srd r7,r0,r11
99 sld r8,r0,r10
100 or r7,r7,r6
101 blt cr6,79f
102 27: ld r0,8(r4)
103 b 2f
104
105 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
106 29: ldu r9,8(r4)
107 sld r8,r0,r10
108 addi r3,r3,-8
109 blt cr6,5f
110 30: ld r0,8(r4)
111 srd r12,r9,r11
112 sld r6,r9,r10
113 31: ldu r9,16(r4)
114 or r12,r8,r12
115 srd r7,r0,r11
116 sld r8,r0,r10
117 addi r3,r3,16
118 beq cr6,78f
119
120 1: or r7,r7,r6
121 32: ld r0,8(r4)
122 76: std r12,8(r3)
123 2: srd r12,r9,r11
124 sld r6,r9,r10
125 33: ldu r9,16(r4)
126 or r12,r8,r12
127 77: stdu r7,16(r3)
128 srd r7,r0,r11
129 sld r8,r0,r10
130 bdnz 1b
131
132 78: std r12,8(r3)
133 or r7,r7,r6
134 79: std r7,16(r3)
135 5: srd r12,r9,r11
136 or r12,r8,r12
137 80: std r12,24(r3)
138 bne 6f
139 li r3,0
140 blr
141 6: cmpwi cr1,r5,8
142 addi r3,r3,32
143 sld r9,r9,r10
144 ble cr1,.Ldo_tail
145 34: ld r0,8(r4)
146 srd r7,r0,r11
147 or r9,r7,r9
148 b .Ldo_tail
149
150 .Ldst_unaligned:
151 PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
152 subf r5,r6,r5
153 li r7,0
154 cmpldi cr1,r5,16
155 bf cr7*4+3,1f
156 35: lbz r0,0(r4)
157 81: stb r0,0(r3)
158 addi r7,r7,1
159 1: bf cr7*4+2,2f
160 36: lhzx r0,r7,r4
161 82: sthx r0,r7,r3
162 addi r7,r7,2
163 2: bf cr7*4+1,3f
164 37: lwzx r0,r7,r4
165 83: stwx r0,r7,r3
166 3: PPC_MTOCRF 0x01,r5
167 add r4,r6,r4
168 add r3,r6,r3
169 b .Ldst_aligned
170
171 .Lshort_copy:
172 bf cr7*4+0,1f
173 38: lwz r0,0(r4)
174 39: lwz r9,4(r4)
175 addi r4,r4,8
176 84: stw r0,0(r3)
177 85: stw r9,4(r3)
178 addi r3,r3,8
179 1: bf cr7*4+1,2f
180 40: lwz r0,0(r4)
181 addi r4,r4,4
182 86: stw r0,0(r3)
183 addi r3,r3,4
184 2: bf cr7*4+2,3f
185 41: lhz r0,0(r4)
186 addi r4,r4,2
187 87: sth r0,0(r3)
188 addi r3,r3,2
189 3: bf cr7*4+3,4f
190 42: lbz r0,0(r4)
191 88: stb r0,0(r3)
192 4: li r3,0
193 blr
194
195 /*
196 * exception handlers follow
197 * we have to return the number of bytes not copied
198 * for an exception on a load, we set the rest of the destination to 0
199 */
200
201 136:
202 137:
203 add r3,r3,r7
204 b 1f
205 130:
206 131:
207 addi r3,r3,8
208 120:
209 122:
210 124:
211 125:
212 126:
213 127:
214 128:
215 129:
216 133:
217 addi r3,r3,8
218 121:
219 132:
220 addi r3,r3,8
221 123:
222 134:
223 135:
224 138:
225 139:
226 140:
227 141:
228 142:
229
230 /*
231 * here we have had a fault on a load and r3 points to the first
232 * unmodified byte of the destination
233 */
234 1: ld r6,-24(r1)
235 ld r4,-16(r1)
236 ld r5,-8(r1)
237 subf r6,r6,r3
238 add r4,r4,r6
239 subf r5,r6,r5 /* #bytes left to go */
240
241 /*
242 * first see if we can copy any more bytes before hitting another exception
243 */
244 mtctr r5
245 43: lbz r0,0(r4)
246 addi r4,r4,1
247 89: stb r0,0(r3)
248 addi r3,r3,1
249 bdnz 43b
250 li r3,0 /* huh? all copied successfully this time? */
251 blr
252
253 /*
254 * here we have trapped again, need to clear ctr bytes starting at r3
255 */
256 143: mfctr r5
257 li r0,0
258 mr r4,r3
259 mr r3,r5 /* return the number of bytes not copied */
260 1: andi. r9,r4,7
261 beq 3f
262 90: stb r0,0(r4)
263 addic. r5,r5,-1
264 addi r4,r4,1
265 bne 1b
266 blr
267 3: cmpldi cr1,r5,8
268 srdi r9,r5,3
269 andi. r5,r5,7
270 blt cr1,93f
271 mtctr r9
272 91: std r0,0(r4)
273 addi r4,r4,8
274 bdnz 91b
275 93: beqlr
276 mtctr r5
277 92: stb r0,0(r4)
278 addi r4,r4,1
279 bdnz 92b
280 blr
281
282 /*
283 * exception handlers for stores: we just need to work
284 * out how many bytes weren't copied
285 */
286 182:
287 183:
288 add r3,r3,r7
289 b 1f
290 180:
291 addi r3,r3,8
292 171:
293 177:
294 addi r3,r3,8
295 170:
296 172:
297 176:
298 178:
299 addi r3,r3,4
300 185:
301 addi r3,r3,4
302 173:
303 174:
304 175:
305 179:
306 181:
307 184:
308 186:
309 187:
310 188:
311 189:
312 1:
313 ld r6,-24(r1)
314 ld r5,-8(r1)
315 add r6,r6,r5
316 subf r3,r3,r6 /* #bytes not copied */
317 190:
318 191:
319 192:
320 blr /* #bytes not copied in r3 */
321
322 .section __ex_table,"a"
323 .align 3
324 .llong 20b,120b
325 .llong 21b,121b
326 .llong 70b,170b
327 .llong 22b,122b
328 .llong 71b,171b
329 .llong 72b,172b
330 .llong 23b,123b
331 .llong 73b,173b
332 .llong 74b,174b
333 .llong 75b,175b
334 .llong 24b,124b
335 .llong 25b,125b
336 .llong 26b,126b
337 .llong 27b,127b
338 .llong 28b,128b
339 .llong 29b,129b
340 .llong 30b,130b
341 .llong 31b,131b
342 .llong 32b,132b
343 .llong 76b,176b
344 .llong 33b,133b
345 .llong 77b,177b
346 .llong 78b,178b
347 .llong 79b,179b
348 .llong 80b,180b
349 .llong 34b,134b
350 .llong 35b,135b
351 .llong 81b,181b
352 .llong 36b,136b
353 .llong 82b,182b
354 .llong 37b,137b
355 .llong 83b,183b
356 .llong 38b,138b
357 .llong 39b,139b
358 .llong 84b,184b
359 .llong 85b,185b
360 .llong 40b,140b
361 .llong 86b,186b
362 .llong 41b,141b
363 .llong 87b,187b
364 .llong 42b,142b
365 .llong 88b,188b
366 .llong 43b,143b
367 .llong 89b,189b
368 .llong 90b,190b
369 .llong 91b,191b
370 .llong 92b,192b
371
372 .text
373
374 /*
375 * Routine to copy a whole page of data, optimized for POWER4.
376 * On POWER4 it is more than 50% faster than the simple loop
377 * above (following the .Ldst_aligned label) but it runs slightly
378 * slower on POWER3.
379 */
380 .Lcopy_page_4K:
381 std r31,-32(1)
382 std r30,-40(1)
383 std r29,-48(1)
384 std r28,-56(1)
385 std r27,-64(1)
386 std r26,-72(1)
387 std r25,-80(1)
388 std r24,-88(1)
389 std r23,-96(1)
390 std r22,-104(1)
391 std r21,-112(1)
392 std r20,-120(1)
393 li r5,4096/32 - 1
394 addi r3,r3,-8
395 li r0,5
396 0: addi r5,r5,-24
397 mtctr r0
398 20: ld r22,640(4)
399 21: ld r21,512(4)
400 22: ld r20,384(4)
401 23: ld r11,256(4)
402 24: ld r9,128(4)
403 25: ld r7,0(4)
404 26: ld r25,648(4)
405 27: ld r24,520(4)
406 28: ld r23,392(4)
407 29: ld r10,264(4)
408 30: ld r8,136(4)
409 31: ldu r6,8(4)
410 cmpwi r5,24
411 1:
412 32: std r22,648(3)
413 33: std r21,520(3)
414 34: std r20,392(3)
415 35: std r11,264(3)
416 36: std r9,136(3)
417 37: std r7,8(3)
418 38: ld r28,648(4)
419 39: ld r27,520(4)
420 40: ld r26,392(4)
421 41: ld r31,264(4)
422 42: ld r30,136(4)
423 43: ld r29,8(4)
424 44: std r25,656(3)
425 45: std r24,528(3)
426 46: std r23,400(3)
427 47: std r10,272(3)
428 48: std r8,144(3)
429 49: std r6,16(3)
430 50: ld r22,656(4)
431 51: ld r21,528(4)
432 52: ld r20,400(4)
433 53: ld r11,272(4)
434 54: ld r9,144(4)
435 55: ld r7,16(4)
436 56: std r28,664(3)
437 57: std r27,536(3)
438 58: std r26,408(3)
439 59: std r31,280(3)
440 60: std r30,152(3)
441 61: stdu r29,24(3)
442 62: ld r25,664(4)
443 63: ld r24,536(4)
444 64: ld r23,408(4)
445 65: ld r10,280(4)
446 66: ld r8,152(4)
447 67: ldu r6,24(4)
448 bdnz 1b
449 68: std r22,648(3)
450 69: std r21,520(3)
451 70: std r20,392(3)
452 71: std r11,264(3)
453 72: std r9,136(3)
454 73: std r7,8(3)
455 74: addi r4,r4,640
456 75: addi r3,r3,648
457 bge 0b
458 mtctr r5
459 76: ld r7,0(4)
460 77: ld r8,8(4)
461 78: ldu r9,16(4)
462 3:
463 79: ld r10,8(4)
464 80: std r7,8(3)
465 81: ld r7,16(4)
466 82: std r8,16(3)
467 83: ld r8,24(4)
468 84: std r9,24(3)
469 85: ldu r9,32(4)
470 86: stdu r10,32(3)
471 bdnz 3b
472 4:
473 87: ld r10,8(4)
474 88: std r7,8(3)
475 89: std r8,16(3)
476 90: std r9,24(3)
477 91: std r10,32(3)
478 9: ld r20,-120(1)
479 ld r21,-112(1)
480 ld r22,-104(1)
481 ld r23,-96(1)
482 ld r24,-88(1)
483 ld r25,-80(1)
484 ld r26,-72(1)
485 ld r27,-64(1)
486 ld r28,-56(1)
487 ld r29,-48(1)
488 ld r30,-40(1)
489 ld r31,-32(1)
490 li r3,0
491 blr
492
493 /*
494 * on an exception, reset to the beginning and jump back into the
495 * standard __copy_tofrom_user
496 */
497 100: ld r20,-120(1)
498 ld r21,-112(1)
499 ld r22,-104(1)
500 ld r23,-96(1)
501 ld r24,-88(1)
502 ld r25,-80(1)
503 ld r26,-72(1)
504 ld r27,-64(1)
505 ld r28,-56(1)
506 ld r29,-48(1)
507 ld r30,-40(1)
508 ld r31,-32(1)
509 ld r3,-24(r1)
510 ld r4,-16(r1)
511 li r5,4096
512 b .Ldst_aligned
513
514 .section __ex_table,"a"
515 .align 3
516 .llong 20b,100b
517 .llong 21b,100b
518 .llong 22b,100b
519 .llong 23b,100b
520 .llong 24b,100b
521 .llong 25b,100b
522 .llong 26b,100b
523 .llong 27b,100b
524 .llong 28b,100b
525 .llong 29b,100b
526 .llong 30b,100b
527 .llong 31b,100b
528 .llong 32b,100b
529 .llong 33b,100b
530 .llong 34b,100b
531 .llong 35b,100b
532 .llong 36b,100b
533 .llong 37b,100b
534 .llong 38b,100b
535 .llong 39b,100b
536 .llong 40b,100b
537 .llong 41b,100b
538 .llong 42b,100b
539 .llong 43b,100b
540 .llong 44b,100b
541 .llong 45b,100b
542 .llong 46b,100b
543 .llong 47b,100b
544 .llong 48b,100b
545 .llong 49b,100b
546 .llong 50b,100b
547 .llong 51b,100b
548 .llong 52b,100b
549 .llong 53b,100b
550 .llong 54b,100b
551 .llong 55b,100b
552 .llong 56b,100b
553 .llong 57b,100b
554 .llong 58b,100b
555 .llong 59b,100b
556 .llong 60b,100b
557 .llong 61b,100b
558 .llong 62b,100b
559 .llong 63b,100b
560 .llong 64b,100b
561 .llong 65b,100b
562 .llong 66b,100b
563 .llong 67b,100b
564 .llong 68b,100b
565 .llong 69b,100b
566 .llong 70b,100b
567 .llong 71b,100b
568 .llong 72b,100b
569 .llong 73b,100b
570 .llong 74b,100b
571 .llong 75b,100b
572 .llong 76b,100b
573 .llong 77b,100b
574 .llong 78b,100b
575 .llong 79b,100b
576 .llong 80b,100b
577 .llong 81b,100b
578 .llong 82b,100b
579 .llong 83b,100b
580 .llong 84b,100b
581 .llong 85b,100b
582 .llong 86b,100b
583 .llong 87b,100b
584 .llong 88b,100b
585 .llong 89b,100b
586 .llong 90b,100b
587 .llong 91b,100b
This page took 0.055676 seconds and 5 git commands to generate.