Merge branch 'linus' into x86/cleanups
[deliverable/linux.git] / arch / x86 / lib / copy_user_nocache_64.S
1 /* Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2.
3 *
4 * Functions to copy from and to user space.
5 */
6
7 #include <linux/linkage.h>
8 #include <asm/dwarf2.h>
9
10 #define FIX_ALIGNMENT 1
11
12 #include <asm/current.h>
13 #include <asm/asm-offsets.h>
14 #include <asm/thread_info.h>
15 #include <asm/cpufeature.h>
16
17 /*
18 * copy_user_nocache - Uncached memory copy with exception handling
19 * This will force destination/source out of cache for more performance.
20 *
21 * Input:
22 * rdi destination
23 * rsi source
24 * rdx count
25 * rcx zero flag when 1 zero on exception
26 *
27 * Output:
28 * eax uncopied bytes or 0 if successful.
29 */
30 ENTRY(__copy_user_nocache)
31 CFI_STARTPROC
32 pushq %rbx
33 CFI_ADJUST_CFA_OFFSET 8
34 CFI_REL_OFFSET rbx, 0
35 pushq %rcx /* save zero flag */
36 CFI_ADJUST_CFA_OFFSET 8
37 CFI_REL_OFFSET rcx, 0
38
39 xorl %eax,%eax /* zero for the exception handler */
40
41 #ifdef FIX_ALIGNMENT
42 /* check for bad alignment of destination */
43 movl %edi,%ecx
44 andl $7,%ecx
45 jnz .Lbad_alignment
46 .Lafter_bad_alignment:
47 #endif
48
49 movq %rdx,%rcx
50
51 movl $64,%ebx
52 shrq $6,%rdx
53 decq %rdx
54 js .Lhandle_tail
55
56 .p2align 4
57 .Lloop:
58 .Ls1: movq (%rsi),%r11
59 .Ls2: movq 1*8(%rsi),%r8
60 .Ls3: movq 2*8(%rsi),%r9
61 .Ls4: movq 3*8(%rsi),%r10
62 .Ld1: movnti %r11,(%rdi)
63 .Ld2: movnti %r8,1*8(%rdi)
64 .Ld3: movnti %r9,2*8(%rdi)
65 .Ld4: movnti %r10,3*8(%rdi)
66
67 .Ls5: movq 4*8(%rsi),%r11
68 .Ls6: movq 5*8(%rsi),%r8
69 .Ls7: movq 6*8(%rsi),%r9
70 .Ls8: movq 7*8(%rsi),%r10
71 .Ld5: movnti %r11,4*8(%rdi)
72 .Ld6: movnti %r8,5*8(%rdi)
73 .Ld7: movnti %r9,6*8(%rdi)
74 .Ld8: movnti %r10,7*8(%rdi)
75
76 dec %rdx
77
78 leaq 64(%rsi),%rsi
79 leaq 64(%rdi),%rdi
80
81 jns .Lloop
82
83 .p2align 4
84 .Lhandle_tail:
85 movl %ecx,%edx
86 andl $63,%ecx
87 shrl $3,%ecx
88 jz .Lhandle_7
89 movl $8,%ebx
90 .p2align 4
91 .Lloop_8:
92 .Ls9: movq (%rsi),%r8
93 .Ld9: movnti %r8,(%rdi)
94 decl %ecx
95 leaq 8(%rdi),%rdi
96 leaq 8(%rsi),%rsi
97 jnz .Lloop_8
98
99 .Lhandle_7:
100 movl %edx,%ecx
101 andl $7,%ecx
102 jz .Lende
103 .p2align 4
104 .Lloop_1:
105 .Ls10: movb (%rsi),%bl
106 .Ld10: movb %bl,(%rdi)
107 incq %rdi
108 incq %rsi
109 decl %ecx
110 jnz .Lloop_1
111
112 CFI_REMEMBER_STATE
113 .Lende:
114 popq %rcx
115 CFI_ADJUST_CFA_OFFSET -8
116 CFI_RESTORE %rcx
117 popq %rbx
118 CFI_ADJUST_CFA_OFFSET -8
119 CFI_RESTORE rbx
120 sfence
121 ret
122 CFI_RESTORE_STATE
123
124 #ifdef FIX_ALIGNMENT
125 /* align destination */
126 .p2align 4
127 .Lbad_alignment:
128 movl $8,%r9d
129 subl %ecx,%r9d
130 movl %r9d,%ecx
131 cmpq %r9,%rdx
132 jz .Lhandle_7
133 js .Lhandle_7
134 .Lalign_1:
135 .Ls11: movb (%rsi),%bl
136 .Ld11: movb %bl,(%rdi)
137 incq %rsi
138 incq %rdi
139 decl %ecx
140 jnz .Lalign_1
141 subq %r9,%rdx
142 jmp .Lafter_bad_alignment
143 #endif
144
145 /* table sorted by exception address */
146 .section __ex_table,"a"
147 .align 8
148 .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */
149 .quad .Ls2,.Ls1e
150 .quad .Ls3,.Ls1e
151 .quad .Ls4,.Ls1e
152 .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */
153 .quad .Ld2,.Ls2e
154 .quad .Ld3,.Ls3e
155 .quad .Ld4,.Ls4e
156 .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */
157 .quad .Ls6,.Ls5e
158 .quad .Ls7,.Ls5e
159 .quad .Ls8,.Ls5e
160 .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */
161 .quad .Ld6,.Ls6e
162 .quad .Ld7,.Ls7e
163 .quad .Ld8,.Ls8e
164 .quad .Ls9,.Le_quad
165 .quad .Ld9,.Le_quad
166 .quad .Ls10,.Le_byte
167 .quad .Ld10,.Le_byte
168 #ifdef FIX_ALIGNMENT
169 .quad .Ls11,.Lzero_rest
170 .quad .Ld11,.Lzero_rest
171 #endif
172 .quad .Le5,.Le_zero
173 .previous
174
175 /* eax: zero, ebx: 64 */
176 .Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
177 .Ls2e: addl $8,%eax
178 .Ls3e: addl $8,%eax
179 .Ls4e: addl $8,%eax
180 .Ls5e: addl $8,%eax
181 .Ls6e: addl $8,%eax
182 .Ls7e: addl $8,%eax
183 .Ls8e: addl $8,%eax
184 addq %rbx,%rdi /* +64 */
185 subq %rax,%rdi /* correct destination with computed offset */
186
187 shlq $6,%rdx /* loop counter * 64 (stride length) */
188 addq %rax,%rdx /* add offset to loopcnt */
189 andl $63,%ecx /* remaining bytes */
190 addq %rcx,%rdx /* add them */
191 jmp .Lzero_rest
192
193 /* exception on quad word loop in tail handling */
194 /* ecx: loopcnt/8, %edx: length, rdi: correct */
195 .Le_quad:
196 shll $3,%ecx
197 andl $7,%edx
198 addl %ecx,%edx
199 /* edx: bytes to zero, rdi: dest, eax:zero */
200 .Lzero_rest:
201 cmpl $0,(%rsp) /* zero flag set? */
202 jz .Le_zero
203 movq %rdx,%rcx
204 .Le_byte:
205 xorl %eax,%eax
206 .Le5: rep
207 stosb
208 /* when there is another exception while zeroing the rest just return */
209 .Le_zero:
210 movq %rdx,%rax
211 jmp .Lende
212 CFI_ENDPROC
213 ENDPROC(__copy_user_nocache)
214
215
This page took 0.053751 seconds and 5 git commands to generate.