Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ |
2 | * | |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
4 | * operating system. INET is implemented using the BSD Socket | |
5 | * interface as the means of communication with the user level. | |
6 | * | |
7 | * IP/TCP/UDP checksumming routines | |
8 | * | |
9 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | |
10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | |
11 | * Tom May, <ftom@netcom.com> | |
12 | * Pentium Pro/II routines: | |
13 | * Alexander Kjeldaas <astor@guardian.no> | |
14 | * Finn Arne Gangstad <finnag@guardian.no> | |
15 | * Lots of code moved from tcp.c and ip.c; see those files | |
16 | * for more names. | |
17 | * | |
18 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | |
19 | * handling. | |
20 | * Andi Kleen, add zeroing on error | |
21 | * converted to pure assembler | |
22 | * | |
23 | * SuperH version: Copyright (C) 1999 Niibe Yutaka | |
24 | * | |
25 | * This program is free software; you can redistribute it and/or | |
26 | * modify it under the terms of the GNU General Public License | |
27 | * as published by the Free Software Foundation; either version | |
28 | * 2 of the License, or (at your option) any later version. | |
29 | */ | |
30 | ||
31 | #include <asm/errno.h> | |
32 | #include <linux/linkage.h> | |
33 | ||
34 | /* | |
35 | * computes a partial checksum, e.g. for TCP/UDP fragments | |
36 | */ | |
37 | ||
38 | /* | |
cadc4e1a | 39 | * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); |
1da177e4 LT |
40 | */ |
41 | ||
42 | .text | |
43 | ENTRY(csum_partial) | |
44 | /* | |
45 | * Experiments with Ethernet and SLIP connections show that buff | |
46 | * is aligned on either a 2-byte or 4-byte boundary. We get at | |
47 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | |
48 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | |
49 | * alignment for the unrolled loop. | |
50 | */ | |
1da177e4 | 51 | mov r4, r0 |
cadc4e1a SM |
52 | tst #3, r0 ! Check alignment. |
53 | bt/s 2f ! Jump if alignment is ok. | |
54 | mov r4, r7 ! Keep a copy to check for alignment | |
1da177e4 | 55 | ! |
cadc4e1a SM |
56 | tst #1, r0 ! Check alignment. |
57 | bt 21f ! Jump if alignment is boundary of 2bytes. | |
58 | ||
59 | ! buf is odd | |
60 | tst r5, r5 | |
61 | add #-1, r5 | |
62 | bt 9f | |
63 | mov.b @r4+, r0 | |
64 | extu.b r0, r0 | |
65 | addc r0, r6 ! t=0 from previous tst | |
66 | mov r6, r0 | |
67 | shll8 r6 | |
68 | shlr16 r0 | |
69 | shlr8 r0 | |
70 | or r0, r6 | |
71 | mov r4, r0 | |
72 | tst #2, r0 | |
73 | bt 2f | |
74 | 21: | |
75 | ! buf is 2 byte aligned (len could be 0) | |
1da177e4 LT |
76 | add #-2, r5 ! Alignment uses up two bytes. |
77 | cmp/pz r5 ! | |
78 | bt/s 1f ! Jump if we had at least two bytes. | |
79 | clrt | |
80 | bra 6f | |
81 | add #2, r5 ! r5 was < 2. Deal with it. | |
82 | 1: | |
1da177e4 LT |
83 | mov.w @r4+, r0 |
84 | extu.w r0, r0 | |
85 | addc r0, r6 | |
86 | bf 2f | |
87 | add #1, r6 | |
88 | 2: | |
cadc4e1a SM |
89 | ! buf is 4 byte aligned (len could be 0) |
90 | mov r5, r1 | |
1da177e4 | 91 | mov #-5, r0 |
cadc4e1a SM |
92 | shld r0, r1 |
93 | tst r1, r1 | |
1da177e4 LT |
94 | bt/s 4f ! if it's =0, go to 4f |
95 | clrt | |
96 | .align 2 | |
97 | 3: | |
98 | mov.l @r4+, r0 | |
99 | mov.l @r4+, r2 | |
100 | mov.l @r4+, r3 | |
101 | addc r0, r6 | |
102 | mov.l @r4+, r0 | |
103 | addc r2, r6 | |
104 | mov.l @r4+, r2 | |
105 | addc r3, r6 | |
106 | mov.l @r4+, r3 | |
107 | addc r0, r6 | |
108 | mov.l @r4+, r0 | |
109 | addc r2, r6 | |
110 | mov.l @r4+, r2 | |
111 | addc r3, r6 | |
112 | addc r0, r6 | |
113 | addc r2, r6 | |
114 | movt r0 | |
cadc4e1a | 115 | dt r1 |
1da177e4 LT |
116 | bf/s 3b |
117 | cmp/eq #1, r0 | |
cadc4e1a SM |
118 | ! here, we know r1==0 |
119 | addc r1, r6 ! add carry to r6 | |
1da177e4 | 120 | 4: |
cadc4e1a | 121 | mov r5, r0 |
1da177e4 LT |
122 | and #0x1c, r0 |
123 | tst r0, r0 | |
cadc4e1a SM |
124 | bt 6f |
125 | ! 4 bytes or more remaining | |
126 | mov r0, r1 | |
127 | shlr2 r1 | |
1da177e4 LT |
128 | mov #0, r2 |
129 | 5: | |
130 | addc r2, r6 | |
131 | mov.l @r4+, r2 | |
132 | movt r0 | |
cadc4e1a | 133 | dt r1 |
1da177e4 LT |
134 | bf/s 5b |
135 | cmp/eq #1, r0 | |
136 | addc r2, r6 | |
cadc4e1a | 137 | addc r1, r6 ! r1==0 here, so it means add carry-bit |
1da177e4 | 138 | 6: |
cadc4e1a | 139 | ! 3 bytes or less remaining |
1da177e4 LT |
140 | mov #3, r0 |
141 | and r0, r5 | |
142 | tst r5, r5 | |
143 | bt 9f ! if it's =0 go to 9f | |
144 | mov #2, r1 | |
145 | cmp/hs r1, r5 | |
146 | bf 7f | |
147 | mov.w @r4+, r0 | |
148 | extu.w r0, r0 | |
149 | cmp/eq r1, r5 | |
150 | bt/s 8f | |
151 | clrt | |
152 | shll16 r0 | |
153 | addc r0, r6 | |
154 | 7: | |
155 | mov.b @r4+, r0 | |
156 | extu.b r0, r0 | |
157 | #ifndef __LITTLE_ENDIAN__ | |
158 | shll8 r0 | |
159 | #endif | |
160 | 8: | |
161 | addc r0, r6 | |
162 | mov #0, r0 | |
cadc4e1a | 163 | addc r0, r6 |
1da177e4 | 164 | 9: |
cadc4e1a SM |
165 | ! Check if the buffer was misaligned, if so realign sum |
166 | mov r7, r0 | |
167 | tst #1, r0 | |
168 | bt 10f | |
169 | mov r6, r0 | |
170 | shll8 r6 | |
171 | shlr16 r0 | |
172 | shlr8 r0 | |
173 | or r0, r6 | |
174 | 10: | |
1da177e4 LT |
175 | rts |
176 | mov r6, r0 | |
177 | ||
178 | /* | |
179 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, | |
180 | int sum, int *src_err_ptr, int *dst_err_ptr) | |
181 | */ | |
182 | ||
183 | /* | |
184 | * Copy from ds while checksumming, otherwise like csum_partial | |
185 | * | |
186 | * The macros SRC and DST specify the type of access for the instruction. | |
187 | * thus we can call a custom exception handler for all access types. | |
188 | * | |
189 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | |
190 | * DST definitions? It's damn hard to trigger all cases. I hope I got | |
191 | * them all but there's no guarantee. | |
192 | */ | |
193 | ||
194 | #define SRC(...) \ | |
195 | 9999: __VA_ARGS__ ; \ | |
196 | .section __ex_table, "a"; \ | |
197 | .long 9999b, 6001f ; \ | |
198 | .previous | |
199 | ||
200 | #define DST(...) \ | |
201 | 9999: __VA_ARGS__ ; \ | |
202 | .section __ex_table, "a"; \ | |
203 | .long 9999b, 6002f ; \ | |
204 | .previous | |
205 | ||
206 | ! | |
207 | ! r4: const char *SRC | |
208 | ! r5: char *DST | |
209 | ! r6: int LEN | |
210 | ! r7: int SUM | |
211 | ! | |
212 | ! on stack: | |
213 | ! int *SRC_ERR_PTR | |
214 | ! int *DST_ERR_PTR | |
215 | ! | |
216 | ENTRY(csum_partial_copy_generic) | |
217 | mov.l r5,@-r15 | |
218 | mov.l r6,@-r15 | |
219 | ||
220 | mov #3,r0 ! Check src and dest are equally aligned | |
221 | mov r4,r1 | |
222 | and r0,r1 | |
223 | and r5,r0 | |
224 | cmp/eq r1,r0 | |
225 | bf 3f ! Different alignments, use slow version | |
226 | tst #1,r0 ! Check dest word aligned | |
227 | bf 3f ! If not, do it the slow way | |
228 | ||
229 | mov #2,r0 | |
230 | tst r0,r5 ! Check dest alignment. | |
231 | bt 2f ! Jump if alignment is ok. | |
232 | add #-2,r6 ! Alignment uses up two bytes. | |
233 | cmp/pz r6 ! Jump if we had at least two bytes. | |
234 | bt/s 1f | |
235 | clrt | |
24ab54cb | 236 | add #2,r6 ! r6 was < 2. Deal with it. |
1da177e4 | 237 | bra 4f |
24ab54cb | 238 | mov r6,r2 |
1da177e4 LT |
239 | |
240 | 3: ! Handle different src and dest alignments. | |
241 | ! This is not common, so simple byte by byte copy will do. | |
242 | mov r6,r2 | |
243 | shlr r6 | |
244 | tst r6,r6 | |
245 | bt 4f | |
246 | clrt | |
247 | .align 2 | |
248 | 5: | |
249 | SRC( mov.b @r4+,r1 ) | |
250 | SRC( mov.b @r4+,r0 ) | |
251 | extu.b r1,r1 | |
252 | DST( mov.b r1,@r5 ) | |
253 | DST( mov.b r0,@(1,r5) ) | |
254 | extu.b r0,r0 | |
255 | add #2,r5 | |
256 | ||
257 | #ifdef __LITTLE_ENDIAN__ | |
258 | shll8 r0 | |
259 | #else | |
260 | shll8 r1 | |
261 | #endif | |
262 | or r1,r0 | |
263 | ||
264 | addc r0,r7 | |
265 | movt r0 | |
266 | dt r6 | |
267 | bf/s 5b | |
268 | cmp/eq #1,r0 | |
269 | mov #0,r0 | |
270 | addc r0, r7 | |
271 | ||
272 | mov r2, r0 | |
273 | tst #1, r0 | |
274 | bt 7f | |
275 | bra 5f | |
276 | clrt | |
277 | ||
278 | ! src and dest equally aligned, but to a two byte boundary. | |
279 | ! Handle first two bytes as a special case | |
280 | .align 2 | |
281 | 1: | |
282 | SRC( mov.w @r4+,r0 ) | |
283 | DST( mov.w r0,@r5 ) | |
284 | add #2,r5 | |
285 | extu.w r0,r0 | |
286 | addc r0,r7 | |
287 | mov #0,r0 | |
288 | addc r0,r7 | |
289 | 2: | |
290 | mov r6,r2 | |
291 | mov #-5,r0 | |
292 | shld r0,r6 | |
293 | tst r6,r6 | |
294 | bt/s 2f | |
295 | clrt | |
296 | .align 2 | |
297 | 1: | |
298 | SRC( mov.l @r4+,r0 ) | |
299 | SRC( mov.l @r4+,r1 ) | |
300 | addc r0,r7 | |
301 | DST( mov.l r0,@r5 ) | |
302 | DST( mov.l r1,@(4,r5) ) | |
303 | addc r1,r7 | |
304 | ||
305 | SRC( mov.l @r4+,r0 ) | |
306 | SRC( mov.l @r4+,r1 ) | |
307 | addc r0,r7 | |
308 | DST( mov.l r0,@(8,r5) ) | |
309 | DST( mov.l r1,@(12,r5) ) | |
310 | addc r1,r7 | |
311 | ||
312 | SRC( mov.l @r4+,r0 ) | |
313 | SRC( mov.l @r4+,r1 ) | |
314 | addc r0,r7 | |
315 | DST( mov.l r0,@(16,r5) ) | |
316 | DST( mov.l r1,@(20,r5) ) | |
317 | addc r1,r7 | |
318 | ||
319 | SRC( mov.l @r4+,r0 ) | |
320 | SRC( mov.l @r4+,r1 ) | |
321 | addc r0,r7 | |
322 | DST( mov.l r0,@(24,r5) ) | |
323 | DST( mov.l r1,@(28,r5) ) | |
324 | addc r1,r7 | |
325 | add #32,r5 | |
326 | movt r0 | |
327 | dt r6 | |
328 | bf/s 1b | |
329 | cmp/eq #1,r0 | |
330 | mov #0,r0 | |
331 | addc r0,r7 | |
332 | ||
333 | 2: mov r2,r6 | |
334 | mov #0x1c,r0 | |
335 | and r0,r6 | |
336 | cmp/pl r6 | |
337 | bf/s 4f | |
338 | clrt | |
339 | shlr2 r6 | |
340 | 3: | |
341 | SRC( mov.l @r4+,r0 ) | |
342 | addc r0,r7 | |
343 | DST( mov.l r0,@r5 ) | |
344 | add #4,r5 | |
345 | movt r0 | |
346 | dt r6 | |
347 | bf/s 3b | |
348 | cmp/eq #1,r0 | |
349 | mov #0,r0 | |
350 | addc r0,r7 | |
351 | 4: mov r2,r6 | |
352 | mov #3,r0 | |
353 | and r0,r6 | |
354 | cmp/pl r6 | |
355 | bf 7f | |
356 | mov #2,r1 | |
357 | cmp/hs r1,r6 | |
358 | bf 5f | |
359 | SRC( mov.w @r4+,r0 ) | |
360 | DST( mov.w r0,@r5 ) | |
361 | extu.w r0,r0 | |
362 | add #2,r5 | |
363 | cmp/eq r1,r6 | |
364 | bt/s 6f | |
365 | clrt | |
366 | shll16 r0 | |
367 | addc r0,r7 | |
368 | 5: | |
369 | SRC( mov.b @r4+,r0 ) | |
370 | DST( mov.b r0,@r5 ) | |
371 | extu.b r0,r0 | |
372 | #ifndef __LITTLE_ENDIAN__ | |
373 | shll8 r0 | |
374 | #endif | |
375 | 6: addc r0,r7 | |
376 | mov #0,r0 | |
377 | addc r0,r7 | |
378 | 7: | |
379 | 5000: | |
380 | ||
381 | # Exception handler: | |
382 | .section .fixup, "ax" | |
383 | ||
384 | 6001: | |
385 | mov.l @(8,r15),r0 ! src_err_ptr | |
386 | mov #-EFAULT,r1 | |
387 | mov.l r1,@r0 | |
388 | ||
389 | ! zero the complete destination - computing the rest | |
390 | ! is too much work | |
391 | mov.l @(4,r15),r5 ! dst | |
392 | mov.l @r15,r6 ! len | |
393 | mov #0,r7 | |
394 | 1: mov.b r7,@r5 | |
395 | dt r6 | |
396 | bf/s 1b | |
397 | add #1,r5 | |
398 | mov.l 8000f,r0 | |
399 | jmp @r0 | |
400 | nop | |
401 | .align 2 | |
402 | 8000: .long 5000b | |
403 | ||
404 | 6002: | |
405 | mov.l @(12,r15),r0 ! dst_err_ptr | |
406 | mov #-EFAULT,r1 | |
407 | mov.l r1,@r0 | |
408 | mov.l 8001f,r0 | |
409 | jmp @r0 | |
410 | nop | |
411 | .align 2 | |
412 | 8001: .long 5000b | |
413 | ||
414 | .previous | |
415 | add #8,r15 | |
416 | rts | |
417 | mov r7,r0 |