sparc: move exports to definitions
[deliverable/linux.git] / arch / sparc / lib / memcpy.S
1 /* memcpy.S: Sparc optimized memcpy and memmove code
2 * Hand optimized from GNU libc's memcpy and memmove
3 * Copyright (C) 1991,1996 Free Software Foundation
4 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
7 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
8 */
9
10 #include <asm/export.h>
11 #define FUNC(x) \
12 .globl x; \
13 .type x,@function; \
14 .align 4; \
15 x:
16
17 /* Both these macros have to start with exactly the same insn */
18 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
19 ldd [%src + (offset) + 0x00], %t0; \
20 ldd [%src + (offset) + 0x08], %t2; \
21 ldd [%src + (offset) + 0x10], %t4; \
22 ldd [%src + (offset) + 0x18], %t6; \
23 st %t0, [%dst + (offset) + 0x00]; \
24 st %t1, [%dst + (offset) + 0x04]; \
25 st %t2, [%dst + (offset) + 0x08]; \
26 st %t3, [%dst + (offset) + 0x0c]; \
27 st %t4, [%dst + (offset) + 0x10]; \
28 st %t5, [%dst + (offset) + 0x14]; \
29 st %t6, [%dst + (offset) + 0x18]; \
30 st %t7, [%dst + (offset) + 0x1c];
31
32 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
33 ldd [%src + (offset) + 0x00], %t0; \
34 ldd [%src + (offset) + 0x08], %t2; \
35 ldd [%src + (offset) + 0x10], %t4; \
36 ldd [%src + (offset) + 0x18], %t6; \
37 std %t0, [%dst + (offset) + 0x00]; \
38 std %t2, [%dst + (offset) + 0x08]; \
39 std %t4, [%dst + (offset) + 0x10]; \
40 std %t6, [%dst + (offset) + 0x18];
41
42 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
43 ldd [%src - (offset) - 0x10], %t0; \
44 ldd [%src - (offset) - 0x08], %t2; \
45 st %t0, [%dst - (offset) - 0x10]; \
46 st %t1, [%dst - (offset) - 0x0c]; \
47 st %t2, [%dst - (offset) - 0x08]; \
48 st %t3, [%dst - (offset) - 0x04];
49
50 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
51 ldd [%src - (offset) - 0x10], %t0; \
52 ldd [%src - (offset) - 0x08], %t2; \
53 std %t0, [%dst - (offset) - 0x10]; \
54 std %t2, [%dst - (offset) - 0x08];
55
56 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
57 ldub [%src - (offset) - 0x02], %t0; \
58 ldub [%src - (offset) - 0x01], %t1; \
59 stb %t0, [%dst - (offset) - 0x02]; \
60 stb %t1, [%dst - (offset) - 0x01];
61
62 /* Both these macros have to start with exactly the same insn */
63 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
64 ldd [%src - (offset) - 0x20], %t0; \
65 ldd [%src - (offset) - 0x18], %t2; \
66 ldd [%src - (offset) - 0x10], %t4; \
67 ldd [%src - (offset) - 0x08], %t6; \
68 st %t0, [%dst - (offset) - 0x20]; \
69 st %t1, [%dst - (offset) - 0x1c]; \
70 st %t2, [%dst - (offset) - 0x18]; \
71 st %t3, [%dst - (offset) - 0x14]; \
72 st %t4, [%dst - (offset) - 0x10]; \
73 st %t5, [%dst - (offset) - 0x0c]; \
74 st %t6, [%dst - (offset) - 0x08]; \
75 st %t7, [%dst - (offset) - 0x04];
76
77 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
78 ldd [%src - (offset) - 0x20], %t0; \
79 ldd [%src - (offset) - 0x18], %t2; \
80 ldd [%src - (offset) - 0x10], %t4; \
81 ldd [%src - (offset) - 0x08], %t6; \
82 std %t0, [%dst - (offset) - 0x20]; \
83 std %t2, [%dst - (offset) - 0x18]; \
84 std %t4, [%dst - (offset) - 0x10]; \
85 std %t6, [%dst - (offset) - 0x08];
86
87 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
88 ldd [%src + (offset) + 0x00], %t0; \
89 ldd [%src + (offset) + 0x08], %t2; \
90 st %t0, [%dst + (offset) + 0x00]; \
91 st %t1, [%dst + (offset) + 0x04]; \
92 st %t2, [%dst + (offset) + 0x08]; \
93 st %t3, [%dst + (offset) + 0x0c];
94
95 #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
96 ldub [%src + (offset) + 0x00], %t0; \
97 ldub [%src + (offset) + 0x01], %t1; \
98 stb %t0, [%dst + (offset) + 0x00]; \
99 stb %t1, [%dst + (offset) + 0x01];
100
101 #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
102 ldd [%src + (offset) + 0x00], %t0; \
103 ldd [%src + (offset) + 0x08], %t2; \
104 srl %t0, shir, %t5; \
105 srl %t1, shir, %t6; \
106 sll %t0, shil, %t0; \
107 or %t5, %prev, %t5; \
108 sll %t1, shil, %prev; \
109 or %t6, %t0, %t0; \
110 srl %t2, shir, %t1; \
111 srl %t3, shir, %t6; \
112 sll %t2, shil, %t2; \
113 or %t1, %prev, %t1; \
114 std %t4, [%dst + (offset) + (offset2) - 0x04]; \
115 std %t0, [%dst + (offset) + (offset2) + 0x04]; \
116 sll %t3, shil, %prev; \
117 or %t6, %t2, %t4;
118
119 #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
120 ldd [%src + (offset) + 0x00], %t0; \
121 ldd [%src + (offset) + 0x08], %t2; \
122 srl %t0, shir, %t4; \
123 srl %t1, shir, %t5; \
124 sll %t0, shil, %t6; \
125 or %t4, %prev, %t0; \
126 sll %t1, shil, %prev; \
127 or %t5, %t6, %t1; \
128 srl %t2, shir, %t4; \
129 srl %t3, shir, %t5; \
130 sll %t2, shil, %t6; \
131 or %t4, %prev, %t2; \
132 sll %t3, shil, %prev; \
133 or %t5, %t6, %t3; \
134 std %t0, [%dst + (offset) + (offset2) + 0x00]; \
135 std %t2, [%dst + (offset) + (offset2) + 0x08];
136
137 .text
138 .align 4
139
140 0:
141 retl
142 nop ! Only bcopy returns here and it retuns void...
143
144 #ifdef __KERNEL__
145 FUNC(amemmove)
146 FUNC(__memmove)
147 EXPORT_SYMBOL(__memmove)
148 #endif
149 FUNC(memmove)
150 EXPORT_SYMBOL(memmove)
151 cmp %o0, %o1
152 mov %o0, %g7
153 bleu 9f
154 sub %o0, %o1, %o4
155
156 add %o1, %o2, %o3
157 cmp %o3, %o0
158 bleu 0f
159 andcc %o4, 3, %o5
160
161 add %o1, %o2, %o1
162 add %o0, %o2, %o0
163 sub %o1, 1, %o1
164 sub %o0, 1, %o0
165
166 1: /* reverse_bytes */
167
168 ldub [%o1], %o4
169 subcc %o2, 1, %o2
170 stb %o4, [%o0]
171 sub %o1, 1, %o1
172 bne 1b
173 sub %o0, 1, %o0
174
175 retl
176 mov %g7, %o0
177
178 /* NOTE: This code is executed just for the cases,
179 where %src (=%o1) & 3 is != 0.
180 We need to align it to 4. So, for (%src & 3)
181 1 we need to do ldub,lduh
182 2 lduh
183 3 just ldub
184 so even if it looks weird, the branches
185 are correct here. -jj
186 */
187 78: /* dword_align */
188
189 andcc %o1, 1, %g0
190 be 4f
191 andcc %o1, 2, %g0
192
193 ldub [%o1], %g2
194 add %o1, 1, %o1
195 stb %g2, [%o0]
196 sub %o2, 1, %o2
197 bne 3f
198 add %o0, 1, %o0
199 4:
200 lduh [%o1], %g2
201 add %o1, 2, %o1
202 sth %g2, [%o0]
203 sub %o2, 2, %o2
204 b 3f
205 add %o0, 2, %o0
206
207 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */
208 EXPORT_SYMBOL(memcpy)
209
210 sub %o0, %o1, %o4
211 mov %o0, %g7
212 9:
213 andcc %o4, 3, %o5
214 0:
215 bne 86f
216 cmp %o2, 15
217
218 bleu 90f
219 andcc %o1, 3, %g0
220
221 bne 78b
222 3:
223 andcc %o1, 4, %g0
224
225 be 2f
226 mov %o2, %g1
227
228 ld [%o1], %o4
229 sub %g1, 4, %g1
230 st %o4, [%o0]
231 add %o1, 4, %o1
232 add %o0, 4, %o0
233 2:
234 andcc %g1, 0xffffff80, %g0
235 be 3f
236 andcc %o0, 4, %g0
237
238 be 82f + 4
239 5:
240 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
241 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
242 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
243 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
244 sub %g1, 128, %g1
245 add %o1, 128, %o1
246 cmp %g1, 128
247 bge 5b
248 add %o0, 128, %o0
249 3:
250 andcc %g1, 0x70, %g4
251 be 80f
252 andcc %g1, 8, %g0
253
254 sethi %hi(80f), %o5
255 srl %g4, 1, %o4
256 add %g4, %o4, %o4
257 add %o1, %g4, %o1
258 sub %o5, %o4, %o5
259 jmpl %o5 + %lo(80f), %g0
260 add %o0, %g4, %o0
261
262 79: /* memcpy_table */
263
264 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
265 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
266 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
267 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
268 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
269 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
270 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
271
272 80: /* memcpy_table_end */
273 be 81f
274 andcc %g1, 4, %g0
275
276 ldd [%o1], %g2
277 add %o0, 8, %o0
278 st %g2, [%o0 - 0x08]
279 add %o1, 8, %o1
280 st %g3, [%o0 - 0x04]
281
282 81: /* memcpy_last7 */
283
284 be 1f
285 andcc %g1, 2, %g0
286
287 ld [%o1], %g2
288 add %o1, 4, %o1
289 st %g2, [%o0]
290 add %o0, 4, %o0
291 1:
292 be 1f
293 andcc %g1, 1, %g0
294
295 lduh [%o1], %g2
296 add %o1, 2, %o1
297 sth %g2, [%o0]
298 add %o0, 2, %o0
299 1:
300 be 1f
301 nop
302
303 ldub [%o1], %g2
304 stb %g2, [%o0]
305 1:
306 retl
307 mov %g7, %o0
308
309 82: /* ldd_std */
310 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
311 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
312 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
313 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
314 subcc %g1, 128, %g1
315 add %o1, 128, %o1
316 cmp %g1, 128
317 bge 82b
318 add %o0, 128, %o0
319
320 andcc %g1, 0x70, %g4
321 be 84f
322 andcc %g1, 8, %g0
323
324 sethi %hi(84f), %o5
325 add %o1, %g4, %o1
326 sub %o5, %g4, %o5
327 jmpl %o5 + %lo(84f), %g0
328 add %o0, %g4, %o0
329
330 83: /* amemcpy_table */
331
332 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
333 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
334 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
335 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
336 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
337 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
338 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
339
340 84: /* amemcpy_table_end */
341 be 85f
342 andcc %g1, 4, %g0
343
344 ldd [%o1], %g2
345 add %o0, 8, %o0
346 std %g2, [%o0 - 0x08]
347 add %o1, 8, %o1
348 85: /* amemcpy_last7 */
349 be 1f
350 andcc %g1, 2, %g0
351
352 ld [%o1], %g2
353 add %o1, 4, %o1
354 st %g2, [%o0]
355 add %o0, 4, %o0
356 1:
357 be 1f
358 andcc %g1, 1, %g0
359
360 lduh [%o1], %g2
361 add %o1, 2, %o1
362 sth %g2, [%o0]
363 add %o0, 2, %o0
364 1:
365 be 1f
366 nop
367
368 ldub [%o1], %g2
369 stb %g2, [%o0]
370 1:
371 retl
372 mov %g7, %o0
373
374 86: /* non_aligned */
375 cmp %o2, 6
376 bleu 88f
377 nop
378
379 save %sp, -96, %sp
380 andcc %i0, 3, %g0
381 be 61f
382 andcc %i0, 1, %g0
383 be 60f
384 andcc %i0, 2, %g0
385
386 ldub [%i1], %g5
387 add %i1, 1, %i1
388 stb %g5, [%i0]
389 sub %i2, 1, %i2
390 bne 61f
391 add %i0, 1, %i0
392 60:
393 ldub [%i1], %g3
394 add %i1, 2, %i1
395 stb %g3, [%i0]
396 sub %i2, 2, %i2
397 ldub [%i1 - 1], %g3
398 add %i0, 2, %i0
399 stb %g3, [%i0 - 1]
400 61:
401 and %i1, 3, %g2
402 and %i2, 0xc, %g3
403 and %i1, -4, %i1
404 cmp %g3, 4
405 sll %g2, 3, %g4
406 mov 32, %g2
407 be 4f
408 sub %g2, %g4, %l0
409
410 blu 3f
411 cmp %g3, 0x8
412
413 be 2f
414 srl %i2, 2, %g3
415
416 ld [%i1], %i3
417 add %i0, -8, %i0
418 ld [%i1 + 4], %i4
419 b 8f
420 add %g3, 1, %g3
421 2:
422 ld [%i1], %i4
423 add %i0, -12, %i0
424 ld [%i1 + 4], %i5
425 add %g3, 2, %g3
426 b 9f
427 add %i1, -4, %i1
428 3:
429 ld [%i1], %g1
430 add %i0, -4, %i0
431 ld [%i1 + 4], %i3
432 srl %i2, 2, %g3
433 b 7f
434 add %i1, 4, %i1
435 4:
436 ld [%i1], %i5
437 cmp %i2, 7
438 ld [%i1 + 4], %g1
439 srl %i2, 2, %g3
440 bleu 10f
441 add %i1, 8, %i1
442
443 ld [%i1], %i3
444 add %g3, -1, %g3
445 5:
446 sll %i5, %g4, %g2
447 srl %g1, %l0, %g5
448 or %g2, %g5, %g2
449 st %g2, [%i0]
450 7:
451 ld [%i1 + 4], %i4
452 sll %g1, %g4, %g2
453 srl %i3, %l0, %g5
454 or %g2, %g5, %g2
455 st %g2, [%i0 + 4]
456 8:
457 ld [%i1 + 8], %i5
458 sll %i3, %g4, %g2
459 srl %i4, %l0, %g5
460 or %g2, %g5, %g2
461 st %g2, [%i0 + 8]
462 9:
463 ld [%i1 + 12], %g1
464 sll %i4, %g4, %g2
465 srl %i5, %l0, %g5
466 addcc %g3, -4, %g3
467 or %g2, %g5, %g2
468 add %i1, 16, %i1
469 st %g2, [%i0 + 12]
470 add %i0, 16, %i0
471 bne,a 5b
472 ld [%i1], %i3
473 10:
474 sll %i5, %g4, %g2
475 srl %g1, %l0, %g5
476 srl %l0, 3, %g3
477 or %g2, %g5, %g2
478 sub %i1, %g3, %i1
479 andcc %i2, 2, %g0
480 st %g2, [%i0]
481 be 1f
482 andcc %i2, 1, %g0
483
484 ldub [%i1], %g2
485 add %i1, 2, %i1
486 stb %g2, [%i0 + 4]
487 add %i0, 2, %i0
488 ldub [%i1 - 1], %g2
489 stb %g2, [%i0 + 3]
490 1:
491 be 1f
492 nop
493 ldub [%i1], %g2
494 stb %g2, [%i0 + 4]
495 1:
496 ret
497 restore %g7, %g0, %o0
498
499 88: /* short_end */
500
501 and %o2, 0xe, %o3
502 20:
503 sethi %hi(89f), %o5
504 sll %o3, 3, %o4
505 add %o0, %o3, %o0
506 sub %o5, %o4, %o5
507 add %o1, %o3, %o1
508 jmpl %o5 + %lo(89f), %g0
509 andcc %o2, 1, %g0
510
511 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
512 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
513 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
514 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
515 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
516 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
517 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
518
519 89: /* short_table_end */
520
521 be 1f
522 nop
523
524 ldub [%o1], %g2
525 stb %g2, [%o0]
526 1:
527 retl
528 mov %g7, %o0
529
530 90: /* short_aligned_end */
531 bne 88b
532 andcc %o2, 8, %g0
533
534 be 1f
535 andcc %o2, 4, %g0
536
537 ld [%o1 + 0x00], %g2
538 ld [%o1 + 0x04], %g3
539 add %o1, 8, %o1
540 st %g2, [%o0 + 0x00]
541 st %g3, [%o0 + 0x04]
542 add %o0, 8, %o0
543 1:
544 b 81b
545 mov %o2, %g1
This page took 0.055531 seconds and 5 git commands to generate.