Merge master.kernel.org:/pub/scm/linux/kernel/git/jejb/voyager-2.6
[deliverable/linux.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11
12 .align 7
13 _GLOBAL(__copy_tofrom_user)
14 /* first check for a whole page copy on a page boundary */
15 cmpldi cr1,r5,16
16 cmpdi cr6,r5,4096
17 or r0,r3,r4
18 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
19 andi. r0,r0,4095
20 std r3,-24(r1)
21 crand cr0*4+2,cr0*4+2,cr6*4+2
22 std r4,-16(r1)
23 std r5,-8(r1)
24 dcbt 0,r4
25 beq .Lcopy_page_4K
26 andi. r6,r6,7
27 PPC_MTOCRF 0x01,r5
28 blt cr1,.Lshort_copy
29 bne .Ldst_unaligned
30 .Ldst_aligned:
31 andi. r0,r4,7
32 addi r3,r3,-16
33 bne .Lsrc_unaligned
34 srdi r7,r5,4
35 20: ld r9,0(r4)
36 addi r4,r4,-8
37 mtctr r7
38 andi. r5,r5,7
39 bf cr7*4+0,22f
40 addi r3,r3,8
41 addi r4,r4,8
42 mr r8,r9
43 blt cr1,72f
44 21: ld r9,8(r4)
45 70: std r8,8(r3)
46 22: ldu r8,16(r4)
47 71: stdu r9,16(r3)
48 bdnz 21b
49 72: std r8,8(r3)
50 beq+ 3f
51 addi r3,r3,16
52 23: ld r9,8(r4)
53 .Ldo_tail:
54 bf cr7*4+1,1f
55 rotldi r9,r9,32
56 73: stw r9,0(r3)
57 addi r3,r3,4
58 1: bf cr7*4+2,2f
59 rotldi r9,r9,16
60 74: sth r9,0(r3)
61 addi r3,r3,2
62 2: bf cr7*4+3,3f
63 rotldi r9,r9,8
64 75: stb r9,0(r3)
65 3: li r3,0
66 blr
67
68 .Lsrc_unaligned:
69 srdi r6,r5,3
70 addi r5,r5,-16
71 subf r4,r0,r4
72 srdi r7,r5,4
73 sldi r10,r0,3
74 cmpldi cr6,r6,3
75 andi. r5,r5,7
76 mtctr r7
77 subfic r11,r10,64
78 add r5,r5,r0
79 bt cr7*4+0,28f
80
81 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
82 25: ld r0,8(r4)
83 sld r6,r9,r10
84 26: ldu r9,16(r4)
85 srd r7,r0,r11
86 sld r8,r0,r10
87 or r7,r7,r6
88 blt cr6,79f
89 27: ld r0,8(r4)
90 b 2f
91
92 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
93 29: ldu r9,8(r4)
94 sld r8,r0,r10
95 addi r3,r3,-8
96 blt cr6,5f
97 30: ld r0,8(r4)
98 srd r12,r9,r11
99 sld r6,r9,r10
100 31: ldu r9,16(r4)
101 or r12,r8,r12
102 srd r7,r0,r11
103 sld r8,r0,r10
104 addi r3,r3,16
105 beq cr6,78f
106
107 1: or r7,r7,r6
108 32: ld r0,8(r4)
109 76: std r12,8(r3)
110 2: srd r12,r9,r11
111 sld r6,r9,r10
112 33: ldu r9,16(r4)
113 or r12,r8,r12
114 77: stdu r7,16(r3)
115 srd r7,r0,r11
116 sld r8,r0,r10
117 bdnz 1b
118
119 78: std r12,8(r3)
120 or r7,r7,r6
121 79: std r7,16(r3)
122 5: srd r12,r9,r11
123 or r12,r8,r12
124 80: std r12,24(r3)
125 bne 6f
126 li r3,0
127 blr
128 6: cmpwi cr1,r5,8
129 addi r3,r3,32
130 sld r9,r9,r10
131 ble cr1,.Ldo_tail
132 34: ld r0,8(r4)
133 srd r7,r0,r11
134 or r9,r7,r9
135 b .Ldo_tail
136
137 .Ldst_unaligned:
138 PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
139 subf r5,r6,r5
140 li r7,0
141 cmpldi r1,r5,16
142 bf cr7*4+3,1f
143 35: lbz r0,0(r4)
144 81: stb r0,0(r3)
145 addi r7,r7,1
146 1: bf cr7*4+2,2f
147 36: lhzx r0,r7,r4
148 82: sthx r0,r7,r3
149 addi r7,r7,2
150 2: bf cr7*4+1,3f
151 37: lwzx r0,r7,r4
152 83: stwx r0,r7,r3
153 3: PPC_MTOCRF 0x01,r5
154 add r4,r6,r4
155 add r3,r6,r3
156 b .Ldst_aligned
157
158 .Lshort_copy:
159 bf cr7*4+0,1f
160 38: lwz r0,0(r4)
161 39: lwz r9,4(r4)
162 addi r4,r4,8
163 84: stw r0,0(r3)
164 85: stw r9,4(r3)
165 addi r3,r3,8
166 1: bf cr7*4+1,2f
167 40: lwz r0,0(r4)
168 addi r4,r4,4
169 86: stw r0,0(r3)
170 addi r3,r3,4
171 2: bf cr7*4+2,3f
172 41: lhz r0,0(r4)
173 addi r4,r4,2
174 87: sth r0,0(r3)
175 addi r3,r3,2
176 3: bf cr7*4+3,4f
177 42: lbz r0,0(r4)
178 88: stb r0,0(r3)
179 4: li r3,0
180 blr
181
182 /*
183 * exception handlers follow
184 * we have to return the number of bytes not copied
185 * for an exception on a load, we set the rest of the destination to 0
186 */
187
188 136:
189 137:
190 add r3,r3,r7
191 b 1f
192 130:
193 131:
194 addi r3,r3,8
195 120:
196 122:
197 124:
198 125:
199 126:
200 127:
201 128:
202 129:
203 133:
204 addi r3,r3,8
205 121:
206 132:
207 addi r3,r3,8
208 123:
209 134:
210 135:
211 138:
212 139:
213 140:
214 141:
215 142:
216
217 /*
218 * here we have had a fault on a load and r3 points to the first
219 * unmodified byte of the destination
220 */
221 1: ld r6,-24(r1)
222 ld r4,-16(r1)
223 ld r5,-8(r1)
224 subf r6,r6,r3
225 add r4,r4,r6
226 subf r5,r6,r5 /* #bytes left to go */
227
228 /*
229 * first see if we can copy any more bytes before hitting another exception
230 */
231 mtctr r5
232 43: lbz r0,0(r4)
233 addi r4,r4,1
234 89: stb r0,0(r3)
235 addi r3,r3,1
236 bdnz 43b
237 li r3,0 /* huh? all copied successfully this time? */
238 blr
239
240 /*
241 * here we have trapped again, need to clear ctr bytes starting at r3
242 */
243 143: mfctr r5
244 li r0,0
245 mr r4,r3
246 mr r3,r5 /* return the number of bytes not copied */
247 1: andi. r9,r4,7
248 beq 3f
249 90: stb r0,0(r4)
250 addic. r5,r5,-1
251 addi r4,r4,1
252 bne 1b
253 blr
254 3: cmpldi cr1,r5,8
255 srdi r9,r5,3
256 andi. r5,r5,7
257 blt cr1,93f
258 mtctr r9
259 91: std r0,0(r4)
260 addi r4,r4,8
261 bdnz 91b
262 93: beqlr
263 mtctr r5
264 92: stb r0,0(r4)
265 addi r4,r4,1
266 bdnz 92b
267 blr
268
269 /*
270 * exception handlers for stores: we just need to work
271 * out how many bytes weren't copied
272 */
273 182:
274 183:
275 add r3,r3,r7
276 b 1f
277 180:
278 addi r3,r3,8
279 171:
280 177:
281 addi r3,r3,8
282 170:
283 172:
284 176:
285 178:
286 addi r3,r3,4
287 185:
288 addi r3,r3,4
289 173:
290 174:
291 175:
292 179:
293 181:
294 184:
295 186:
296 187:
297 188:
298 189:
299 1:
300 ld r6,-24(r1)
301 ld r5,-8(r1)
302 add r6,r6,r5
303 subf r3,r3,r6 /* #bytes not copied */
304 190:
305 191:
306 192:
307 blr /* #bytes not copied in r3 */
308
309 .section __ex_table,"a"
310 .align 3
311 .llong 20b,120b
312 .llong 21b,121b
313 .llong 70b,170b
314 .llong 22b,122b
315 .llong 71b,171b
316 .llong 72b,172b
317 .llong 23b,123b
318 .llong 73b,173b
319 .llong 74b,174b
320 .llong 75b,175b
321 .llong 24b,124b
322 .llong 25b,125b
323 .llong 26b,126b
324 .llong 27b,127b
325 .llong 28b,128b
326 .llong 29b,129b
327 .llong 30b,130b
328 .llong 31b,131b
329 .llong 32b,132b
330 .llong 76b,176b
331 .llong 33b,133b
332 .llong 77b,177b
333 .llong 78b,178b
334 .llong 79b,179b
335 .llong 80b,180b
336 .llong 34b,134b
337 .llong 35b,135b
338 .llong 81b,181b
339 .llong 36b,136b
340 .llong 82b,182b
341 .llong 37b,137b
342 .llong 83b,183b
343 .llong 38b,138b
344 .llong 39b,139b
345 .llong 84b,184b
346 .llong 85b,185b
347 .llong 40b,140b
348 .llong 86b,186b
349 .llong 41b,141b
350 .llong 87b,187b
351 .llong 42b,142b
352 .llong 88b,188b
353 .llong 43b,143b
354 .llong 89b,189b
355 .llong 90b,190b
356 .llong 91b,191b
357 .llong 92b,192b
358
359 .text
360
361 /*
362 * Routine to copy a whole page of data, optimized for POWER4.
363 * On POWER4 it is more than 50% faster than the simple loop
364 * above (following the .Ldst_aligned label) but it runs slightly
365 * slower on POWER3.
366 */
367 .Lcopy_page_4K:
368 std r31,-32(1)
369 std r30,-40(1)
370 std r29,-48(1)
371 std r28,-56(1)
372 std r27,-64(1)
373 std r26,-72(1)
374 std r25,-80(1)
375 std r24,-88(1)
376 std r23,-96(1)
377 std r22,-104(1)
378 std r21,-112(1)
379 std r20,-120(1)
380 li r5,4096/32 - 1
381 addi r3,r3,-8
382 li r0,5
383 0: addi r5,r5,-24
384 mtctr r0
385 20: ld r22,640(4)
386 21: ld r21,512(4)
387 22: ld r20,384(4)
388 23: ld r11,256(4)
389 24: ld r9,128(4)
390 25: ld r7,0(4)
391 26: ld r25,648(4)
392 27: ld r24,520(4)
393 28: ld r23,392(4)
394 29: ld r10,264(4)
395 30: ld r8,136(4)
396 31: ldu r6,8(4)
397 cmpwi r5,24
398 1:
399 32: std r22,648(3)
400 33: std r21,520(3)
401 34: std r20,392(3)
402 35: std r11,264(3)
403 36: std r9,136(3)
404 37: std r7,8(3)
405 38: ld r28,648(4)
406 39: ld r27,520(4)
407 40: ld r26,392(4)
408 41: ld r31,264(4)
409 42: ld r30,136(4)
410 43: ld r29,8(4)
411 44: std r25,656(3)
412 45: std r24,528(3)
413 46: std r23,400(3)
414 47: std r10,272(3)
415 48: std r8,144(3)
416 49: std r6,16(3)
417 50: ld r22,656(4)
418 51: ld r21,528(4)
419 52: ld r20,400(4)
420 53: ld r11,272(4)
421 54: ld r9,144(4)
422 55: ld r7,16(4)
423 56: std r28,664(3)
424 57: std r27,536(3)
425 58: std r26,408(3)
426 59: std r31,280(3)
427 60: std r30,152(3)
428 61: stdu r29,24(3)
429 62: ld r25,664(4)
430 63: ld r24,536(4)
431 64: ld r23,408(4)
432 65: ld r10,280(4)
433 66: ld r8,152(4)
434 67: ldu r6,24(4)
435 bdnz 1b
436 68: std r22,648(3)
437 69: std r21,520(3)
438 70: std r20,392(3)
439 71: std r11,264(3)
440 72: std r9,136(3)
441 73: std r7,8(3)
442 74: addi r4,r4,640
443 75: addi r3,r3,648
444 bge 0b
445 mtctr r5
446 76: ld r7,0(4)
447 77: ld r8,8(4)
448 78: ldu r9,16(4)
449 3:
450 79: ld r10,8(4)
451 80: std r7,8(3)
452 81: ld r7,16(4)
453 82: std r8,16(3)
454 83: ld r8,24(4)
455 84: std r9,24(3)
456 85: ldu r9,32(4)
457 86: stdu r10,32(3)
458 bdnz 3b
459 4:
460 87: ld r10,8(4)
461 88: std r7,8(3)
462 89: std r8,16(3)
463 90: std r9,24(3)
464 91: std r10,32(3)
465 9: ld r20,-120(1)
466 ld r21,-112(1)
467 ld r22,-104(1)
468 ld r23,-96(1)
469 ld r24,-88(1)
470 ld r25,-80(1)
471 ld r26,-72(1)
472 ld r27,-64(1)
473 ld r28,-56(1)
474 ld r29,-48(1)
475 ld r30,-40(1)
476 ld r31,-32(1)
477 li r3,0
478 blr
479
480 /*
481 * on an exception, reset to the beginning and jump back into the
482 * standard __copy_tofrom_user
483 */
484 100: ld r20,-120(1)
485 ld r21,-112(1)
486 ld r22,-104(1)
487 ld r23,-96(1)
488 ld r24,-88(1)
489 ld r25,-80(1)
490 ld r26,-72(1)
491 ld r27,-64(1)
492 ld r28,-56(1)
493 ld r29,-48(1)
494 ld r30,-40(1)
495 ld r31,-32(1)
496 ld r3,-24(r1)
497 ld r4,-16(r1)
498 li r5,4096
499 b .Ldst_aligned
500
501 .section __ex_table,"a"
502 .align 3
503 .llong 20b,100b
504 .llong 21b,100b
505 .llong 22b,100b
506 .llong 23b,100b
507 .llong 24b,100b
508 .llong 25b,100b
509 .llong 26b,100b
510 .llong 27b,100b
511 .llong 28b,100b
512 .llong 29b,100b
513 .llong 30b,100b
514 .llong 31b,100b
515 .llong 32b,100b
516 .llong 33b,100b
517 .llong 34b,100b
518 .llong 35b,100b
519 .llong 36b,100b
520 .llong 37b,100b
521 .llong 38b,100b
522 .llong 39b,100b
523 .llong 40b,100b
524 .llong 41b,100b
525 .llong 42b,100b
526 .llong 43b,100b
527 .llong 44b,100b
528 .llong 45b,100b
529 .llong 46b,100b
530 .llong 47b,100b
531 .llong 48b,100b
532 .llong 49b,100b
533 .llong 50b,100b
534 .llong 51b,100b
535 .llong 52b,100b
536 .llong 53b,100b
537 .llong 54b,100b
538 .llong 55b,100b
539 .llong 56b,100b
540 .llong 57b,100b
541 .llong 58b,100b
542 .llong 59b,100b
543 .llong 60b,100b
544 .llong 61b,100b
545 .llong 62b,100b
546 .llong 63b,100b
547 .llong 64b,100b
548 .llong 65b,100b
549 .llong 66b,100b
550 .llong 67b,100b
551 .llong 68b,100b
552 .llong 69b,100b
553 .llong 70b,100b
554 .llong 71b,100b
555 .llong 72b,100b
556 .llong 73b,100b
557 .llong 74b,100b
558 .llong 75b,100b
559 .llong 76b,100b
560 .llong 77b,100b
561 .llong 78b,100b
562 .llong 79b,100b
563 .llong 80b,100b
564 .llong 81b,100b
565 .llong 82b,100b
566 .llong 83b,100b
567 .llong 84b,100b
568 .llong 85b,100b
569 .llong 86b,100b
570 .llong 87b,100b
571 .llong 88b,100b
572 .llong 89b,100b
573 .llong 90b,100b
574 .llong 91b,100b
This page took 0.043495 seconds and 6 git commands to generate.