Merge branch 'linus' into x86/cleanups
[deliverable/linux.git] / arch / x86 / lib / copy_user_nocache_64.S
CommitLineData
0812a579
AK
1/* Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2.
3 *
4 * Functions to copy from and to user space.
5 */
6
7#include <linux/linkage.h>
8#include <asm/dwarf2.h>
9
10#define FIX_ALIGNMENT 1
11
12#include <asm/current.h>
13#include <asm/asm-offsets.h>
14#include <asm/thread_info.h>
15#include <asm/cpufeature.h>
16
17/*
18 * copy_user_nocache - Uncached memory copy with exception handling
19 * This will force destination/source out of cache for more performance.
20 *
21 * Input:
22 * rdi destination
23 * rsi source
24 * rdx count
25 * rcx zero flag when 1 zero on exception
26 *
27 * Output:
28 * eax uncopied bytes or 0 if successful.
29 */
30ENTRY(__copy_user_nocache)
31 CFI_STARTPROC
32 pushq %rbx
33 CFI_ADJUST_CFA_OFFSET 8
34 CFI_REL_OFFSET rbx, 0
35 pushq %rcx /* save zero flag */
36 CFI_ADJUST_CFA_OFFSET 8
37 CFI_REL_OFFSET rcx, 0
38
39 xorl %eax,%eax /* zero for the exception handler */
40
41#ifdef FIX_ALIGNMENT
42 /* check for bad alignment of destination */
43 movl %edi,%ecx
44 andl $7,%ecx
45 jnz .Lbad_alignment
46.Lafter_bad_alignment:
47#endif
48
49 movq %rdx,%rcx
50
51 movl $64,%ebx
52 shrq $6,%rdx
53 decq %rdx
54 js .Lhandle_tail
55
56 .p2align 4
57.Lloop:
58.Ls1: movq (%rsi),%r11
59.Ls2: movq 1*8(%rsi),%r8
60.Ls3: movq 2*8(%rsi),%r9
61.Ls4: movq 3*8(%rsi),%r10
62.Ld1: movnti %r11,(%rdi)
63.Ld2: movnti %r8,1*8(%rdi)
64.Ld3: movnti %r9,2*8(%rdi)
65.Ld4: movnti %r10,3*8(%rdi)
66
67.Ls5: movq 4*8(%rsi),%r11
68.Ls6: movq 5*8(%rsi),%r8
69.Ls7: movq 6*8(%rsi),%r9
70.Ls8: movq 7*8(%rsi),%r10
71.Ld5: movnti %r11,4*8(%rdi)
72.Ld6: movnti %r8,5*8(%rdi)
73.Ld7: movnti %r9,6*8(%rdi)
74.Ld8: movnti %r10,7*8(%rdi)
75
76 dec %rdx
77
78 leaq 64(%rsi),%rsi
79 leaq 64(%rdi),%rdi
80
81 jns .Lloop
82
83 .p2align 4
84.Lhandle_tail:
85 movl %ecx,%edx
86 andl $63,%ecx
87 shrl $3,%ecx
88 jz .Lhandle_7
89 movl $8,%ebx
90 .p2align 4
91.Lloop_8:
92.Ls9: movq (%rsi),%r8
93.Ld9: movnti %r8,(%rdi)
94 decl %ecx
95 leaq 8(%rdi),%rdi
96 leaq 8(%rsi),%rsi
97 jnz .Lloop_8
98
99.Lhandle_7:
100 movl %edx,%ecx
101 andl $7,%ecx
102 jz .Lende
103 .p2align 4
104.Lloop_1:
105.Ls10: movb (%rsi),%bl
106.Ld10: movb %bl,(%rdi)
107 incq %rdi
108 incq %rsi
109 decl %ecx
110 jnz .Lloop_1
111
112 CFI_REMEMBER_STATE
113.Lende:
114 popq %rcx
115 CFI_ADJUST_CFA_OFFSET -8
116 CFI_RESTORE %rcx
117 popq %rbx
118 CFI_ADJUST_CFA_OFFSET -8
119 CFI_RESTORE rbx
df1bdc06 120 sfence
0812a579
AK
121 ret
122 CFI_RESTORE_STATE
123
124#ifdef FIX_ALIGNMENT
125 /* align destination */
126 .p2align 4
127.Lbad_alignment:
128 movl $8,%r9d
129 subl %ecx,%r9d
130 movl %r9d,%ecx
131 cmpq %r9,%rdx
132 jz .Lhandle_7
133 js .Lhandle_7
134.Lalign_1:
135.Ls11: movb (%rsi),%bl
136.Ld11: movb %bl,(%rdi)
137 incq %rsi
138 incq %rdi
139 decl %ecx
140 jnz .Lalign_1
141 subq %r9,%rdx
142 jmp .Lafter_bad_alignment
143#endif
144
145 /* table sorted by exception address */
146 .section __ex_table,"a"
147 .align 8
42a886af
LT
148 .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */
149 .quad .Ls2,.Ls1e
150 .quad .Ls3,.Ls1e
151 .quad .Ls4,.Ls1e
152 .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */
0812a579
AK
153 .quad .Ld2,.Ls2e
154 .quad .Ld3,.Ls3e
155 .quad .Ld4,.Ls4e
42a886af
LT
156 .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */
157 .quad .Ls6,.Ls5e
158 .quad .Ls7,.Ls5e
159 .quad .Ls8,.Ls5e
160 .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */
0812a579
AK
161 .quad .Ld6,.Ls6e
162 .quad .Ld7,.Ls7e
163 .quad .Ld8,.Ls8e
164 .quad .Ls9,.Le_quad
165 .quad .Ld9,.Le_quad
166 .quad .Ls10,.Le_byte
167 .quad .Ld10,.Le_byte
168#ifdef FIX_ALIGNMENT
169 .quad .Ls11,.Lzero_rest
170 .quad .Ld11,.Lzero_rest
171#endif
172 .quad .Le5,.Le_zero
173 .previous
174
0812a579 175 /* eax: zero, ebx: 64 */
42a886af 176.Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
0812a579
AK
177.Ls2e: addl $8,%eax
178.Ls3e: addl $8,%eax
179.Ls4e: addl $8,%eax
180.Ls5e: addl $8,%eax
181.Ls6e: addl $8,%eax
182.Ls7e: addl $8,%eax
183.Ls8e: addl $8,%eax
184 addq %rbx,%rdi /* +64 */
185 subq %rax,%rdi /* correct destination with computed offset */
186
187 shlq $6,%rdx /* loop counter * 64 (stride length) */
188 addq %rax,%rdx /* add offset to loopcnt */
189 andl $63,%ecx /* remaining bytes */
190 addq %rcx,%rdx /* add them */
191 jmp .Lzero_rest
192
193 /* exception on quad word loop in tail handling */
194 /* ecx: loopcnt/8, %edx: length, rdi: correct */
195.Le_quad:
196 shll $3,%ecx
197 andl $7,%edx
198 addl %ecx,%edx
199 /* edx: bytes to zero, rdi: dest, eax:zero */
200.Lzero_rest:
201 cmpl $0,(%rsp) /* zero flag set? */
202 jz .Le_zero
203 movq %rdx,%rcx
204.Le_byte:
205 xorl %eax,%eax
206.Le5: rep
207 stosb
208 /* when there is another exception while zeroing the rest just return */
209.Le_zero:
210 movq %rdx,%rax
211 jmp .Lende
212 CFI_ENDPROC
213ENDPROC(__copy_user_nocache)
214
215
This page took 0.169631 seconds and 5 git commands to generate.