Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[deliverable/linux.git] / arch / x86 / lib / csum-copy_64.S
CommitLineData
1da177e4 1/*
2c76397b
IM
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
1da177e4
LT
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8d379dad 8#include <linux/linkage.h>
8d379dad 9#include <asm/errno.h>
015e6f11 10#include <asm/asm.h>
1da177e4
LT
11
12/*
13 * Checksum copy with exception handling.
2c76397b 14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
1da177e4 15 * destination is zeroed.
2c76397b 16 *
1da177e4
LT
17 * Input
18 * rdi source
19 * rsi destination
20 * edx len (32bit)
2c76397b 21 * ecx sum (32bit)
1da177e4
LT
22 * r8 src_err_ptr (int)
23 * r9 dst_err_ptr (int)
24 *
25 * Output
26 * eax 64bit sum. undefined in case of exception.
2c76397b
IM
27 *
28 * Wrappers need to take care of valid exception sum and zeroing.
1da177e4
LT
29 * They also should align source or destination to 8 bytes.
30 */
31
32 .macro source
3310:
015e6f11 34 _ASM_EXTABLE(10b, .Lbad_source)
1da177e4 35 .endm
2c76397b 36
1da177e4
LT
37 .macro dest
3820:
015e6f11 39 _ASM_EXTABLE(20b, .Lbad_dest)
1da177e4 40 .endm
2c76397b 41
1da177e4
LT
42 .macro ignore L=.Lignore
4330:
015e6f11 44 _ASM_EXTABLE(30b, \L)
1da177e4 45 .endm
2c76397b
IM
46
47
8d379dad 48ENTRY(csum_partial_copy_generic)
2c76397b
IM
49 cmpl $3*64, %edx
50 jle .Lignore
1da177e4 51
2c76397b
IM
52.Lignore:
53 subq $7*8, %rsp
2c76397b 54 movq %rbx, 2*8(%rsp)
2c76397b 55 movq %r12, 3*8(%rsp)
2c76397b 56 movq %r14, 4*8(%rsp)
2c76397b 57 movq %r13, 5*8(%rsp)
2c76397b 58 movq %rbp, 6*8(%rsp)
1da177e4 59
2c76397b
IM
60 movq %r8, (%rsp)
61 movq %r9, 1*8(%rsp)
1da177e4 62
2c76397b
IM
63 movl %ecx, %eax
64 movl %edx, %ecx
1da177e4 65
2c76397b
IM
66 xorl %r9d, %r9d
67 movq %rcx, %r12
68
69 shrq $6, %r12
70 jz .Lhandle_tail /* < 64 */
1da177e4
LT
71
72 clc
2c76397b 73
1da177e4
LT
74 /* main loop. clear in 64 byte blocks */
75 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
76 /* r11: temp3, rdx: temp4, r12 loopcnt */
77 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
78 .p2align 4
79.Lloop:
80 source
2c76397b 81 movq (%rdi), %rbx
1da177e4 82 source
2c76397b 83 movq 8(%rdi), %r8
1da177e4 84 source
2c76397b 85 movq 16(%rdi), %r11
1da177e4 86 source
2c76397b 87 movq 24(%rdi), %rdx
1da177e4
LT
88
89 source
2c76397b 90 movq 32(%rdi), %r10
1da177e4 91 source
2c76397b 92 movq 40(%rdi), %rbp
1da177e4 93 source
2c76397b 94 movq 48(%rdi), %r14
1da177e4 95 source
2c76397b
IM
96 movq 56(%rdi), %r13
97
1da177e4
LT
98 ignore 2f
99 prefetcht0 5*64(%rdi)
2c76397b
IM
1002:
101 adcq %rbx, %rax
102 adcq %r8, %rax
103 adcq %r11, %rax
104 adcq %rdx, %rax
105 adcq %r10, %rax
106 adcq %rbp, %rax
107 adcq %r14, %rax
108 adcq %r13, %rax
1da177e4
LT
109
110 decl %r12d
2c76397b 111
1da177e4 112 dest
2c76397b 113 movq %rbx, (%rsi)
1da177e4 114 dest
2c76397b 115 movq %r8, 8(%rsi)
1da177e4 116 dest
2c76397b 117 movq %r11, 16(%rsi)
1da177e4 118 dest
2c76397b 119 movq %rdx, 24(%rsi)
1da177e4
LT
120
121 dest
2c76397b 122 movq %r10, 32(%rsi)
1da177e4 123 dest
2c76397b 124 movq %rbp, 40(%rsi)
1da177e4 125 dest
2c76397b 126 movq %r14, 48(%rsi)
1da177e4 127 dest
2c76397b
IM
128 movq %r13, 56(%rsi)
129
1da177e4 1303:
1da177e4 131
2c76397b
IM
132 leaq 64(%rdi), %rdi
133 leaq 64(%rsi), %rsi
1da177e4 134
2c76397b
IM
135 jnz .Lloop
136
137 adcq %r9, %rax
1da177e4 138
0d2eb44f 139 /* do last up to 56 bytes */
1da177e4
LT
140.Lhandle_tail:
141 /* ecx: count */
2c76397b
IM
142 movl %ecx, %r10d
143 andl $63, %ecx
144 shrl $3, %ecx
145 jz .Lfold
1da177e4
LT
146 clc
147 .p2align 4
2c76397b 148.Lloop_8:
1da177e4 149 source
2c76397b
IM
150 movq (%rdi), %rbx
151 adcq %rbx, %rax
1da177e4
LT
152 decl %ecx
153 dest
2c76397b
IM
154 movq %rbx, (%rsi)
155 leaq 8(%rsi), %rsi /* preserve carry */
156 leaq 8(%rdi), %rdi
1da177e4 157 jnz .Lloop_8
2c76397b 158 adcq %r9, %rax /* add in carry */
1da177e4
LT
159
160.Lfold:
161 /* reduce checksum to 32bits */
2c76397b
IM
162 movl %eax, %ebx
163 shrq $32, %rax
164 addl %ebx, %eax
165 adcl %r9d, %eax
1da177e4 166
2c76397b 167 /* do last up to 6 bytes */
1da177e4 168.Lhandle_7:
2c76397b
IM
169 movl %r10d, %ecx
170 andl $7, %ecx
171 shrl $1, %ecx
1da177e4 172 jz .Lhandle_1
2c76397b
IM
173 movl $2, %edx
174 xorl %ebx, %ebx
175 clc
1da177e4 176 .p2align 4
2c76397b 177.Lloop_1:
1da177e4 178 source
2c76397b
IM
179 movw (%rdi), %bx
180 adcl %ebx, %eax
1da177e4 181 decl %ecx
92ed0223 182 dest
2c76397b
IM
183 movw %bx, (%rsi)
184 leaq 2(%rdi), %rdi
185 leaq 2(%rsi), %rsi
1da177e4 186 jnz .Lloop_1
2c76397b
IM
187 adcl %r9d, %eax /* add in carry */
188
1da177e4
LT
189 /* handle last odd byte */
190.Lhandle_1:
3e1aa7cb 191 testb $1, %r10b
1da177e4 192 jz .Lende
2c76397b 193 xorl %ebx, %ebx
1da177e4 194 source
2c76397b 195 movb (%rdi), %bl
1da177e4 196 dest
2c76397b
IM
197 movb %bl, (%rsi)
198 addl %ebx, %eax
199 adcl %r9d, %eax /* carry */
200
1da177e4 201.Lende:
2c76397b 202 movq 2*8(%rsp), %rbx
2c76397b 203 movq 3*8(%rsp), %r12
2c76397b 204 movq 4*8(%rsp), %r14
2c76397b 205 movq 5*8(%rsp), %r13
2c76397b 206 movq 6*8(%rsp), %rbp
2c76397b 207 addq $7*8, %rsp
1da177e4
LT
208 ret
209
210 /* Exception handlers. Very simple, zeroing is done in the wrappers */
211.Lbad_source:
2c76397b
IM
212 movq (%rsp), %rax
213 testq %rax, %rax
1da177e4 214 jz .Lende
2c76397b 215 movl $-EFAULT, (%rax)
1da177e4 216 jmp .Lende
2c76397b 217
1da177e4 218.Lbad_dest:
2c76397b
IM
219 movq 8(%rsp), %rax
220 testq %rax, %rax
221 jz .Lende
222 movl $-EFAULT, (%rax)
1da177e4 223 jmp .Lende
8d379dad 224ENDPROC(csum_partial_copy_generic)
This page took 0.736836 seconds and 5 git commands to generate.