Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. |
2 | * | |
3 | * Copyright(C) 1995 Linus Torvalds | |
4 | * Copyright(C) 1996 David S. Miller | |
5 | * Copyright(C) 1996 Eddie C. Dost | |
6 | * Copyright(C) 1996,1998 Jakub Jelinek | |
7 | * | |
8 | * derived from: | |
9 | * e-mail between David and Eddie. | |
10 | * | |
11 | * Returns 0 if successful, otherwise count of bytes not copied yet | |
12 | */ | |
13 | ||
14 | #include <asm/ptrace.h> | |
15 | #include <asm/asmmacro.h> | |
16 | #include <asm/page.h> | |
3a1d5c84 | 17 | #include <asm/thread_info.h> |
d3867f04 | 18 | #include <asm/export.h> |
1da177e4 LT |
19 | |
20 | /* Work around cpp -rob */ | |
21 | #define ALLOC #alloc | |
22 | #define EXECINSTR #execinstr | |
23 | #define EX(x,y,a,b) \ | |
24 | 98: x,y; \ | |
25 | .section .fixup,ALLOC,EXECINSTR; \ | |
26 | .align 4; \ | |
27 | 99: ba fixupretl; \ | |
28 | a, b, %g3; \ | |
29 | .section __ex_table,ALLOC; \ | |
30 | .align 4; \ | |
31 | .word 98b, 99b; \ | |
32 | .text; \ | |
33 | .align 4 | |
34 | ||
35 | #define EX2(x,y,c,d,e,a,b) \ | |
36 | 98: x,y; \ | |
37 | .section .fixup,ALLOC,EXECINSTR; \ | |
38 | .align 4; \ | |
39 | 99: c, d, e; \ | |
40 | ba fixupretl; \ | |
41 | a, b, %g3; \ | |
42 | .section __ex_table,ALLOC; \ | |
43 | .align 4; \ | |
44 | .word 98b, 99b; \ | |
45 | .text; \ | |
46 | .align 4 | |
47 | ||
48 | #define EXO2(x,y) \ | |
49 | 98: x, y; \ | |
50 | .section __ex_table,ALLOC; \ | |
51 | .align 4; \ | |
52 | .word 98b, 97f; \ | |
53 | .text; \ | |
54 | .align 4 | |
55 | ||
56 | #define EXT(start,end,handler) \ | |
57 | .section __ex_table,ALLOC; \ | |
58 | .align 4; \ | |
59 | .word start, 0, end, handler; \ | |
60 | .text; \ | |
61 | .align 4 | |
62 | ||
63 | /* Please do not change following macros unless you change logic used | |
64 | * in .fixup at the end of this file as well | |
65 | */ | |
66 | ||
67 | /* Both these macros have to start with exactly the same insn */ | |
68 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | |
69 | ldd [%src + (offset) + 0x00], %t0; \ | |
70 | ldd [%src + (offset) + 0x08], %t2; \ | |
71 | ldd [%src + (offset) + 0x10], %t4; \ | |
72 | ldd [%src + (offset) + 0x18], %t6; \ | |
73 | st %t0, [%dst + (offset) + 0x00]; \ | |
74 | st %t1, [%dst + (offset) + 0x04]; \ | |
75 | st %t2, [%dst + (offset) + 0x08]; \ | |
76 | st %t3, [%dst + (offset) + 0x0c]; \ | |
77 | st %t4, [%dst + (offset) + 0x10]; \ | |
78 | st %t5, [%dst + (offset) + 0x14]; \ | |
79 | st %t6, [%dst + (offset) + 0x18]; \ | |
80 | st %t7, [%dst + (offset) + 0x1c]; | |
81 | ||
82 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | |
83 | ldd [%src + (offset) + 0x00], %t0; \ | |
84 | ldd [%src + (offset) + 0x08], %t2; \ | |
85 | ldd [%src + (offset) + 0x10], %t4; \ | |
86 | ldd [%src + (offset) + 0x18], %t6; \ | |
87 | std %t0, [%dst + (offset) + 0x00]; \ | |
88 | std %t2, [%dst + (offset) + 0x08]; \ | |
89 | std %t4, [%dst + (offset) + 0x10]; \ | |
90 | std %t6, [%dst + (offset) + 0x18]; | |
91 | ||
92 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
93 | ldd [%src - (offset) - 0x10], %t0; \ | |
94 | ldd [%src - (offset) - 0x08], %t2; \ | |
95 | st %t0, [%dst - (offset) - 0x10]; \ | |
96 | st %t1, [%dst - (offset) - 0x0c]; \ | |
97 | st %t2, [%dst - (offset) - 0x08]; \ | |
98 | st %t3, [%dst - (offset) - 0x04]; | |
99 | ||
100 | #define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
101 | lduh [%src + (offset) + 0x00], %t0; \ | |
102 | lduh [%src + (offset) + 0x02], %t1; \ | |
103 | lduh [%src + (offset) + 0x04], %t2; \ | |
104 | lduh [%src + (offset) + 0x06], %t3; \ | |
105 | sth %t0, [%dst + (offset) + 0x00]; \ | |
106 | sth %t1, [%dst + (offset) + 0x02]; \ | |
107 | sth %t2, [%dst + (offset) + 0x04]; \ | |
108 | sth %t3, [%dst + (offset) + 0x06]; | |
109 | ||
110 | #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | |
111 | ldub [%src - (offset) - 0x02], %t0; \ | |
112 | ldub [%src - (offset) - 0x01], %t1; \ | |
113 | stb %t0, [%dst - (offset) - 0x02]; \ | |
114 | stb %t1, [%dst - (offset) - 0x01]; | |
115 | ||
116 | .text | |
117 | .align 4 | |
118 | ||
119 | .globl __copy_user_begin | |
120 | __copy_user_begin: | |
121 | ||
122 | .globl __copy_user | |
d3867f04 | 123 | EXPORT_SYMBOL(__copy_user) |
1da177e4 LT |
124 | dword_align: |
125 | andcc %o1, 1, %g0 | |
126 | be 4f | |
127 | andcc %o1, 2, %g0 | |
128 | ||
129 | EXO2(ldub [%o1], %g2) | |
130 | add %o1, 1, %o1 | |
131 | EXO2(stb %g2, [%o0]) | |
132 | sub %o2, 1, %o2 | |
133 | bne 3f | |
134 | add %o0, 1, %o0 | |
135 | ||
136 | EXO2(lduh [%o1], %g2) | |
137 | add %o1, 2, %o1 | |
138 | EXO2(sth %g2, [%o0]) | |
139 | sub %o2, 2, %o2 | |
140 | b 3f | |
141 | add %o0, 2, %o0 | |
142 | 4: | |
143 | EXO2(lduh [%o1], %g2) | |
144 | add %o1, 2, %o1 | |
145 | EXO2(sth %g2, [%o0]) | |
146 | sub %o2, 2, %o2 | |
147 | b 3f | |
148 | add %o0, 2, %o0 | |
149 | ||
150 | __copy_user: /* %o0=dst %o1=src %o2=len */ | |
151 | xor %o0, %o1, %o4 | |
152 | 1: | |
153 | andcc %o4, 3, %o5 | |
154 | 2: | |
155 | bne cannot_optimize | |
156 | cmp %o2, 15 | |
157 | ||
158 | bleu short_aligned_end | |
159 | andcc %o1, 3, %g0 | |
160 | ||
161 | bne dword_align | |
162 | 3: | |
163 | andcc %o1, 4, %g0 | |
164 | ||
165 | be 2f | |
166 | mov %o2, %g1 | |
167 | ||
168 | EXO2(ld [%o1], %o4) | |
169 | sub %g1, 4, %g1 | |
170 | EXO2(st %o4, [%o0]) | |
171 | add %o1, 4, %o1 | |
172 | add %o0, 4, %o0 | |
173 | 2: | |
174 | andcc %g1, 0xffffff80, %g7 | |
175 | be 3f | |
176 | andcc %o0, 4, %g0 | |
177 | ||
178 | be ldd_std + 4 | |
179 | 5: | |
180 | MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | |
181 | MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | |
182 | MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | |
183 | MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | |
184 | 80: | |
185 | EXT(5b, 80b, 50f) | |
186 | subcc %g7, 128, %g7 | |
187 | add %o1, 128, %o1 | |
188 | bne 5b | |
189 | add %o0, 128, %o0 | |
190 | 3: | |
191 | andcc %g1, 0x70, %g7 | |
192 | be copy_user_table_end | |
193 | andcc %g1, 8, %g0 | |
194 | ||
195 | sethi %hi(copy_user_table_end), %o5 | |
196 | srl %g7, 1, %o4 | |
197 | add %g7, %o4, %o4 | |
198 | add %o1, %g7, %o1 | |
199 | sub %o5, %o4, %o5 | |
200 | jmpl %o5 + %lo(copy_user_table_end), %g0 | |
201 | add %o0, %g7, %o0 | |
202 | ||
203 | copy_user_table: | |
204 | MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | |
205 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | |
206 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | |
207 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | |
208 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | |
209 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | |
210 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | |
211 | copy_user_table_end: | |
212 | EXT(copy_user_table, copy_user_table_end, 51f) | |
213 | be copy_user_last7 | |
214 | andcc %g1, 4, %g0 | |
215 | ||
216 | EX(ldd [%o1], %g2, and %g1, 0xf) | |
217 | add %o0, 8, %o0 | |
218 | add %o1, 8, %o1 | |
219 | EX(st %g2, [%o0 - 0x08], and %g1, 0xf) | |
220 | EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) | |
221 | copy_user_last7: | |
222 | be 1f | |
223 | andcc %g1, 2, %g0 | |
224 | ||
225 | EX(ld [%o1], %g2, and %g1, 7) | |
226 | add %o1, 4, %o1 | |
227 | EX(st %g2, [%o0], and %g1, 7) | |
228 | add %o0, 4, %o0 | |
229 | 1: | |
230 | be 1f | |
231 | andcc %g1, 1, %g0 | |
232 | ||
233 | EX(lduh [%o1], %g2, and %g1, 3) | |
234 | add %o1, 2, %o1 | |
235 | EX(sth %g2, [%o0], and %g1, 3) | |
236 | add %o0, 2, %o0 | |
237 | 1: | |
238 | be 1f | |
239 | nop | |
240 | ||
241 | EX(ldub [%o1], %g2, add %g0, 1) | |
242 | EX(stb %g2, [%o0], add %g0, 1) | |
243 | 1: | |
244 | retl | |
245 | clr %o0 | |
246 | ||
247 | ldd_std: | |
248 | MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | |
249 | MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | |
250 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | |
251 | MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | |
252 | 81: | |
253 | EXT(ldd_std, 81b, 52f) | |
254 | subcc %g7, 128, %g7 | |
255 | add %o1, 128, %o1 | |
256 | bne ldd_std | |
257 | add %o0, 128, %o0 | |
258 | ||
259 | andcc %g1, 0x70, %g7 | |
260 | be copy_user_table_end | |
261 | andcc %g1, 8, %g0 | |
262 | ||
263 | sethi %hi(copy_user_table_end), %o5 | |
264 | srl %g7, 1, %o4 | |
265 | add %g7, %o4, %o4 | |
266 | add %o1, %g7, %o1 | |
267 | sub %o5, %o4, %o5 | |
268 | jmpl %o5 + %lo(copy_user_table_end), %g0 | |
269 | add %o0, %g7, %o0 | |
270 | ||
271 | cannot_optimize: | |
272 | bleu short_end | |
273 | cmp %o5, 2 | |
274 | ||
275 | bne byte_chunk | |
276 | and %o2, 0xfffffff0, %o3 | |
277 | ||
278 | andcc %o1, 1, %g0 | |
279 | be 10f | |
280 | nop | |
281 | ||
282 | EXO2(ldub [%o1], %g2) | |
283 | add %o1, 1, %o1 | |
284 | EXO2(stb %g2, [%o0]) | |
285 | sub %o2, 1, %o2 | |
286 | andcc %o2, 0xfffffff0, %o3 | |
287 | be short_end | |
288 | add %o0, 1, %o0 | |
289 | 10: | |
290 | MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | |
291 | MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) | |
292 | 82: | |
293 | EXT(10b, 82b, 53f) | |
294 | subcc %o3, 0x10, %o3 | |
295 | add %o1, 0x10, %o1 | |
296 | bne 10b | |
297 | add %o0, 0x10, %o0 | |
298 | b 2f | |
299 | and %o2, 0xe, %o3 | |
300 | ||
301 | byte_chunk: | |
302 | MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) | |
303 | MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) | |
304 | MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) | |
305 | MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) | |
306 | MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) | |
307 | MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) | |
308 | MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) | |
309 | MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) | |
310 | 83: | |
311 | EXT(byte_chunk, 83b, 54f) | |
312 | subcc %o3, 0x10, %o3 | |
313 | add %o1, 0x10, %o1 | |
314 | bne byte_chunk | |
315 | add %o0, 0x10, %o0 | |
316 | ||
317 | short_end: | |
318 | and %o2, 0xe, %o3 | |
319 | 2: | |
320 | sethi %hi(short_table_end), %o5 | |
321 | sll %o3, 3, %o4 | |
322 | add %o0, %o3, %o0 | |
323 | sub %o5, %o4, %o5 | |
324 | add %o1, %o3, %o1 | |
325 | jmpl %o5 + %lo(short_table_end), %g0 | |
326 | andcc %o2, 1, %g0 | |
327 | 84: | |
328 | MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | |
329 | MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | |
330 | MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | |
331 | MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | |
332 | MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | |
333 | MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | |
334 | MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | |
335 | short_table_end: | |
336 | EXT(84b, short_table_end, 55f) | |
337 | be 1f | |
338 | nop | |
339 | EX(ldub [%o1], %g2, add %g0, 1) | |
340 | EX(stb %g2, [%o0], add %g0, 1) | |
341 | 1: | |
342 | retl | |
343 | clr %o0 | |
344 | ||
345 | short_aligned_end: | |
346 | bne short_end | |
347 | andcc %o2, 8, %g0 | |
348 | ||
349 | be 1f | |
350 | andcc %o2, 4, %g0 | |
351 | ||
352 | EXO2(ld [%o1 + 0x00], %g2) | |
353 | EXO2(ld [%o1 + 0x04], %g3) | |
354 | add %o1, 8, %o1 | |
355 | EXO2(st %g2, [%o0 + 0x00]) | |
356 | EX(st %g3, [%o0 + 0x04], sub %o2, 4) | |
357 | add %o0, 8, %o0 | |
358 | 1: | |
359 | b copy_user_last7 | |
360 | mov %o2, %g1 | |
361 | ||
362 | .section .fixup,#alloc,#execinstr | |
363 | .align 4 | |
364 | 97: | |
365 | mov %o2, %g3 | |
366 | fixupretl: | |
367 | sethi %hi(PAGE_OFFSET), %g1 | |
368 | cmp %o0, %g1 | |
369 | blu 1f | |
370 | cmp %o1, %g1 | |
371 | bgeu 1f | |
3a1d5c84 DM |
372 | ld [%g6 + TI_PREEMPT], %g1 |
373 | cmp %g1, 0 | |
374 | bne 1f | |
1da177e4 LT |
375 | nop |
376 | save %sp, -64, %sp | |
377 | mov %i0, %o0 | |
378 | call __bzero | |
379 | mov %g3, %o1 | |
380 | restore | |
381 | 1: retl | |
382 | mov %g3, %o0 | |
383 | ||
384 | /* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ | |
385 | 50: | |
386 | /* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK | |
387 | * happens. This is derived from the amount ldd reads, st stores, etc. | |
388 | * x = g2 % 12; | |
389 | * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); | |
390 | * o0 += (g2 / 12) * 32; | |
391 | */ | |
392 | cmp %g2, 12 | |
393 | add %o0, %g7, %o0 | |
394 | bcs 1f | |
395 | cmp %g2, 24 | |
396 | bcs 2f | |
397 | cmp %g2, 36 | |
398 | bcs 3f | |
399 | nop | |
400 | sub %g2, 12, %g2 | |
401 | sub %g7, 32, %g7 | |
402 | 3: sub %g2, 12, %g2 | |
403 | sub %g7, 32, %g7 | |
404 | 2: sub %g2, 12, %g2 | |
405 | sub %g7, 32, %g7 | |
406 | 1: cmp %g2, 4 | |
407 | bcs,a 60f | |
408 | clr %g2 | |
409 | sub %g2, 4, %g2 | |
410 | sll %g2, 2, %g2 | |
411 | 60: and %g1, 0x7f, %g3 | |
412 | sub %o0, %g7, %o0 | |
413 | add %g3, %g7, %g3 | |
414 | ba fixupretl | |
415 | sub %g3, %g2, %g3 | |
416 | 51: | |
417 | /* i = 41 - g2; j = i % 6; | |
418 | * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; | |
419 | * o0 -= (i / 6) * 16 + 16; | |
420 | */ | |
421 | neg %g2 | |
422 | and %g1, 0xf, %g1 | |
423 | add %g2, 41, %g2 | |
424 | add %o0, %g1, %o0 | |
425 | 1: cmp %g2, 6 | |
426 | bcs,a 2f | |
427 | cmp %g2, 4 | |
428 | add %g1, 16, %g1 | |
429 | b 1b | |
430 | sub %g2, 6, %g2 | |
431 | 2: bcc,a 2f | |
432 | mov 16, %g2 | |
433 | inc %g2 | |
434 | sll %g2, 2, %g2 | |
435 | 2: add %g1, %g2, %g3 | |
436 | ba fixupretl | |
437 | sub %o0, %g3, %o0 | |
438 | 52: | |
439 | /* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; | |
440 | o0 += (g2 / 8) * 32 */ | |
441 | andn %g2, 7, %g4 | |
442 | add %o0, %g7, %o0 | |
443 | andcc %g2, 4, %g0 | |
444 | and %g2, 3, %g2 | |
445 | sll %g4, 2, %g4 | |
446 | sll %g2, 3, %g2 | |
447 | bne 60b | |
448 | sub %g7, %g4, %g7 | |
449 | ba 60b | |
450 | clr %g2 | |
451 | 53: | |
452 | /* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; | |
453 | o0 += (g2 & 8) */ | |
454 | and %g2, 3, %g4 | |
455 | andcc %g2, 4, %g0 | |
456 | and %g2, 8, %g2 | |
457 | sll %g4, 1, %g4 | |
458 | be 1f | |
459 | add %o0, %g2, %o0 | |
460 | add %g2, %g4, %g2 | |
461 | 1: and %o2, 0xf, %g3 | |
462 | add %g3, %o3, %g3 | |
463 | ba fixupretl | |
464 | sub %g3, %g2, %g3 | |
465 | 54: | |
466 | /* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; | |
467 | o0 += (g2 / 4) * 2 */ | |
468 | srl %g2, 2, %o4 | |
469 | and %g2, 1, %o5 | |
470 | srl %g2, 1, %g2 | |
471 | add %o4, %o4, %o4 | |
472 | and %o5, %g2, %o5 | |
473 | and %o2, 0xf, %o2 | |
474 | add %o0, %o4, %o0 | |
475 | sub %o3, %o5, %o3 | |
476 | sub %o2, %o4, %o2 | |
477 | ba fixupretl | |
478 | add %o2, %o3, %g3 | |
479 | 55: | |
480 | /* i = 27 - g2; | |
481 | g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); | |
482 | o0 -= i / 4 * 2 + 1 */ | |
483 | neg %g2 | |
484 | and %o2, 1, %o2 | |
485 | add %g2, 27, %g2 | |
486 | srl %g2, 2, %o5 | |
487 | andcc %g2, 3, %g0 | |
488 | mov 1, %g2 | |
489 | add %o5, %o5, %o5 | |
490 | be,a 1f | |
491 | clr %g2 | |
492 | 1: add %g2, %o5, %g3 | |
493 | sub %o0, %g3, %o0 | |
494 | ba fixupretl | |
495 | add %g3, %o2, %g3 | |
496 | ||
497 | .globl __copy_user_end | |
498 | __copy_user_end: |