Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
2c76397b IM |
2 | * Copyright 2002, 2003 Andi Kleen, SuSE Labs. |
3 | * | |
1da177e4 LT |
4 | * This file is subject to the terms and conditions of the GNU General Public |
5 | * License. See the file COPYING in the main directory of this archive | |
6 | * for more details. No warranty for anything given at all. | |
7 | */ | |
8d379dad | 8 | #include <linux/linkage.h> |
8d379dad | 9 | #include <asm/errno.h> |
015e6f11 | 10 | #include <asm/asm.h> |
1da177e4 LT |
11 | |
12 | /* | |
13 | * Checksum copy with exception handling. | |
2c76397b | 14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
1da177e4 | 15 | * destination is zeroed. |
2c76397b | 16 | * |
1da177e4 LT |
17 | * Input |
18 | * rdi source | |
19 | * rsi destination | |
20 | * edx len (32bit) | |
2c76397b | 21 | * ecx sum (32bit) |
1da177e4 LT |
22 | * r8 src_err_ptr (int) |
23 | * r9 dst_err_ptr (int) | |
24 | * | |
25 | * Output | |
26 | * eax 64bit sum. undefined in case of exception. | |
2c76397b IM |
27 | * |
28 | * Wrappers need to take care of valid exception sum and zeroing. | |
1da177e4 LT |
29 | * They also should align source or destination to 8 bytes. |
30 | */ | |
31 | ||
32 | .macro source | |
33 | 10: | |
015e6f11 | 34 | _ASM_EXTABLE(10b, .Lbad_source) |
1da177e4 | 35 | .endm |
2c76397b | 36 | |
1da177e4 LT |
37 | .macro dest |
38 | 20: | |
015e6f11 | 39 | _ASM_EXTABLE(20b, .Lbad_dest) |
1da177e4 | 40 | .endm |
2c76397b | 41 | |
1da177e4 LT |
42 | .macro ignore L=.Lignore |
43 | 30: | |
015e6f11 | 44 | _ASM_EXTABLE(30b, \L) |
1da177e4 | 45 | .endm |
2c76397b IM |
46 | |
47 | ||
8d379dad | 48 | ENTRY(csum_partial_copy_generic) |
2c76397b IM |
49 | cmpl $3*64, %edx |
50 | jle .Lignore | |
1da177e4 | 51 | |
2c76397b IM |
52 | .Lignore: |
53 | subq $7*8, %rsp | |
2c76397b | 54 | movq %rbx, 2*8(%rsp) |
2c76397b | 55 | movq %r12, 3*8(%rsp) |
2c76397b | 56 | movq %r14, 4*8(%rsp) |
2c76397b | 57 | movq %r13, 5*8(%rsp) |
2c76397b | 58 | movq %rbp, 6*8(%rsp) |
1da177e4 | 59 | |
2c76397b IM |
60 | movq %r8, (%rsp) |
61 | movq %r9, 1*8(%rsp) | |
1da177e4 | 62 | |
2c76397b IM |
63 | movl %ecx, %eax |
64 | movl %edx, %ecx | |
1da177e4 | 65 | |
2c76397b IM |
66 | xorl %r9d, %r9d |
67 | movq %rcx, %r12 | |
68 | ||
69 | shrq $6, %r12 | |
70 | jz .Lhandle_tail /* < 64 */ | |
1da177e4 LT |
71 | |
72 | clc | |
2c76397b | 73 | |
1da177e4 LT |
74 | /* main loop. clear in 64 byte blocks */ |
75 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | |
76 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | |
77 | /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ | |
78 | .p2align 4 | |
79 | .Lloop: | |
80 | source | |
2c76397b | 81 | movq (%rdi), %rbx |
1da177e4 | 82 | source |
2c76397b | 83 | movq 8(%rdi), %r8 |
1da177e4 | 84 | source |
2c76397b | 85 | movq 16(%rdi), %r11 |
1da177e4 | 86 | source |
2c76397b | 87 | movq 24(%rdi), %rdx |
1da177e4 LT |
88 | |
89 | source | |
2c76397b | 90 | movq 32(%rdi), %r10 |
1da177e4 | 91 | source |
2c76397b | 92 | movq 40(%rdi), %rbp |
1da177e4 | 93 | source |
2c76397b | 94 | movq 48(%rdi), %r14 |
1da177e4 | 95 | source |
2c76397b IM |
96 | movq 56(%rdi), %r13 |
97 | ||
1da177e4 LT |
98 | ignore 2f |
99 | prefetcht0 5*64(%rdi) | |
2c76397b IM |
100 | 2: |
101 | adcq %rbx, %rax | |
102 | adcq %r8, %rax | |
103 | adcq %r11, %rax | |
104 | adcq %rdx, %rax | |
105 | adcq %r10, %rax | |
106 | adcq %rbp, %rax | |
107 | adcq %r14, %rax | |
108 | adcq %r13, %rax | |
1da177e4 LT |
109 | |
110 | decl %r12d | |
2c76397b | 111 | |
1da177e4 | 112 | dest |
2c76397b | 113 | movq %rbx, (%rsi) |
1da177e4 | 114 | dest |
2c76397b | 115 | movq %r8, 8(%rsi) |
1da177e4 | 116 | dest |
2c76397b | 117 | movq %r11, 16(%rsi) |
1da177e4 | 118 | dest |
2c76397b | 119 | movq %rdx, 24(%rsi) |
1da177e4 LT |
120 | |
121 | dest | |
2c76397b | 122 | movq %r10, 32(%rsi) |
1da177e4 | 123 | dest |
2c76397b | 124 | movq %rbp, 40(%rsi) |
1da177e4 | 125 | dest |
2c76397b | 126 | movq %r14, 48(%rsi) |
1da177e4 | 127 | dest |
2c76397b IM |
128 | movq %r13, 56(%rsi) |
129 | ||
1da177e4 | 130 | 3: |
1da177e4 | 131 | |
2c76397b IM |
132 | leaq 64(%rdi), %rdi |
133 | leaq 64(%rsi), %rsi | |
1da177e4 | 134 | |
2c76397b IM |
135 | jnz .Lloop |
136 | ||
137 | adcq %r9, %rax | |
1da177e4 | 138 | |
0d2eb44f | 139 | /* do last up to 56 bytes */ |
1da177e4 LT |
140 | .Lhandle_tail: |
141 | /* ecx: count */ | |
2c76397b IM |
142 | movl %ecx, %r10d |
143 | andl $63, %ecx | |
144 | shrl $3, %ecx | |
145 | jz .Lfold | |
1da177e4 LT |
146 | clc |
147 | .p2align 4 | |
2c76397b | 148 | .Lloop_8: |
1da177e4 | 149 | source |
2c76397b IM |
150 | movq (%rdi), %rbx |
151 | adcq %rbx, %rax | |
1da177e4 LT |
152 | decl %ecx |
153 | dest | |
2c76397b IM |
154 | movq %rbx, (%rsi) |
155 | leaq 8(%rsi), %rsi /* preserve carry */ | |
156 | leaq 8(%rdi), %rdi | |
1da177e4 | 157 | jnz .Lloop_8 |
2c76397b | 158 | adcq %r9, %rax /* add in carry */ |
1da177e4 LT |
159 | |
160 | .Lfold: | |
161 | /* reduce checksum to 32bits */ | |
2c76397b IM |
162 | movl %eax, %ebx |
163 | shrq $32, %rax | |
164 | addl %ebx, %eax | |
165 | adcl %r9d, %eax | |
1da177e4 | 166 | |
2c76397b | 167 | /* do last up to 6 bytes */ |
1da177e4 | 168 | .Lhandle_7: |
2c76397b IM |
169 | movl %r10d, %ecx |
170 | andl $7, %ecx | |
171 | shrl $1, %ecx | |
1da177e4 | 172 | jz .Lhandle_1 |
2c76397b IM |
173 | movl $2, %edx |
174 | xorl %ebx, %ebx | |
175 | clc | |
1da177e4 | 176 | .p2align 4 |
2c76397b | 177 | .Lloop_1: |
1da177e4 | 178 | source |
2c76397b IM |
179 | movw (%rdi), %bx |
180 | adcl %ebx, %eax | |
1da177e4 | 181 | decl %ecx |
92ed0223 | 182 | dest |
2c76397b IM |
183 | movw %bx, (%rsi) |
184 | leaq 2(%rdi), %rdi | |
185 | leaq 2(%rsi), %rsi | |
1da177e4 | 186 | jnz .Lloop_1 |
2c76397b IM |
187 | adcl %r9d, %eax /* add in carry */ |
188 | ||
1da177e4 LT |
189 | /* handle last odd byte */ |
190 | .Lhandle_1: | |
3e1aa7cb | 191 | testb $1, %r10b |
1da177e4 | 192 | jz .Lende |
2c76397b | 193 | xorl %ebx, %ebx |
1da177e4 | 194 | source |
2c76397b | 195 | movb (%rdi), %bl |
1da177e4 | 196 | dest |
2c76397b IM |
197 | movb %bl, (%rsi) |
198 | addl %ebx, %eax | |
199 | adcl %r9d, %eax /* carry */ | |
200 | ||
1da177e4 | 201 | .Lende: |
2c76397b | 202 | movq 2*8(%rsp), %rbx |
2c76397b | 203 | movq 3*8(%rsp), %r12 |
2c76397b | 204 | movq 4*8(%rsp), %r14 |
2c76397b | 205 | movq 5*8(%rsp), %r13 |
2c76397b | 206 | movq 6*8(%rsp), %rbp |
2c76397b | 207 | addq $7*8, %rsp |
1da177e4 LT |
208 | ret |
209 | ||
210 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | |
211 | .Lbad_source: | |
2c76397b IM |
212 | movq (%rsp), %rax |
213 | testq %rax, %rax | |
1da177e4 | 214 | jz .Lende |
2c76397b | 215 | movl $-EFAULT, (%rax) |
1da177e4 | 216 | jmp .Lende |
2c76397b | 217 | |
1da177e4 | 218 | .Lbad_dest: |
2c76397b IM |
219 | movq 8(%rsp), %rax |
220 | testq %rax, %rax | |
221 | jz .Lende | |
222 | movl $-EFAULT, (%rax) | |
1da177e4 | 223 | jmp .Lende |
8d379dad | 224 | ENDPROC(csum_partial_copy_generic) |