Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/csumpartialcopygeneric.S | |
3 | * | |
4 | * Copyright (C) 1995-2001 Russell King | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
6ebbf2ce | 10 | #include <asm/assembler.h> |
4dd1837d | 11 | #include <asm/export.h> |
1da177e4 LT |
12 | |
13 | /* | |
14 | * unsigned int | |
15 | * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) | |
16 | * r0 = src, r1 = dst, r2 = len, r3 = sum | |
17 | * Returns : r0 = checksum | |
18 | * | |
19 | * Note that 'tst' and 'teq' preserve the carry flag. | |
20 | */ | |
21 | ||
22 | src .req r0 | |
23 | dst .req r1 | |
24 | len .req r2 | |
25 | sum .req r3 | |
26 | ||
8adbb371 | 27 | .Lzero: mov r0, sum |
90303b10 | 28 | load_regs |
1da177e4 LT |
29 | |
30 | /* | |
31 | * Align an unaligned destination pointer. We know that | |
32 | * we have >= 8 bytes here, so we don't need to check | |
33 | * the length. Note that the source pointer hasn't been | |
34 | * aligned yet. | |
35 | */ | |
8adbb371 NP |
36 | .Ldst_unaligned: |
37 | tst dst, #1 | |
38 | beq .Ldst_16bit | |
1da177e4 LT |
39 | |
40 | load1b ip | |
41 | sub len, len, #1 | |
42 | adcs sum, sum, ip, put_byte_1 @ update checksum | |
43 | strb ip, [dst], #1 | |
44 | tst dst, #2 | |
6ebbf2ce | 45 | reteq lr @ dst is now 32bit aligned |
1da177e4 | 46 | |
8adbb371 | 47 | .Ldst_16bit: load2b r8, ip |
1da177e4 LT |
48 | sub len, len, #2 |
49 | adcs sum, sum, r8, put_byte_0 | |
50 | strb r8, [dst], #1 | |
51 | adcs sum, sum, ip, put_byte_1 | |
52 | strb ip, [dst], #1 | |
6ebbf2ce | 53 | ret lr @ dst is now 32bit aligned |
1da177e4 LT |
54 | |
55 | /* | |
56 | * Handle 0 to 7 bytes, with any alignment of source and | |
57 | * destination pointers. Note that when we get here, C = 0 | |
58 | */ | |
8adbb371 NP |
59 | .Lless8: teq len, #0 @ check for zero count |
60 | beq .Lzero | |
1da177e4 LT |
61 | |
62 | /* we must have at least one byte. */ | |
63 | tst dst, #1 @ dst 16-bit aligned | |
8adbb371 | 64 | beq .Lless8_aligned |
1da177e4 LT |
65 | |
66 | /* Align dst */ | |
67 | load1b ip | |
68 | sub len, len, #1 | |
69 | adcs sum, sum, ip, put_byte_1 @ update checksum | |
70 | strb ip, [dst], #1 | |
71 | tst len, #6 | |
8adbb371 | 72 | beq .Lless8_byteonly |
1da177e4 LT |
73 | |
74 | 1: load2b r8, ip | |
75 | sub len, len, #2 | |
76 | adcs sum, sum, r8, put_byte_0 | |
77 | strb r8, [dst], #1 | |
78 | adcs sum, sum, ip, put_byte_1 | |
79 | strb ip, [dst], #1 | |
8adbb371 NP |
80 | .Lless8_aligned: |
81 | tst len, #6 | |
1da177e4 | 82 | bne 1b |
8adbb371 | 83 | .Lless8_byteonly: |
1da177e4 | 84 | tst len, #1 |
8adbb371 | 85 | beq .Ldone |
1da177e4 LT |
86 | load1b r8 |
87 | adcs sum, sum, r8, put_byte_0 @ update checksum | |
88 | strb r8, [dst], #1 | |
8adbb371 | 89 | b .Ldone |
1da177e4 LT |
90 | |
91 | FN_ENTRY | |
1da177e4 | 92 | save_regs |
1da177e4 LT |
93 | |
94 | cmp len, #8 @ Ensure that we have at least | |
8adbb371 | 95 | blo .Lless8 @ 8 bytes to copy. |
1da177e4 LT |
96 | |
97 | adds sum, sum, #0 @ C = 0 | |
98 | tst dst, #3 @ Test destination alignment | |
8adbb371 | 99 | blne .Ldst_unaligned @ align destination, return here |
1da177e4 LT |
100 | |
101 | /* | |
102 | * Ok, the dst pointer is now 32bit aligned, and we know | |
103 | * that we must have more than 4 bytes to copy. Note | |
104 | * that C contains the carry from the dst alignment above. | |
105 | */ | |
106 | ||
107 | tst src, #3 @ Test source alignment | |
8adbb371 | 108 | bne .Lsrc_not_aligned |
1da177e4 LT |
109 | |
110 | /* Routine for src & dst aligned */ | |
111 | ||
112 | bics ip, len, #15 | |
113 | beq 2f | |
114 | ||
115 | 1: load4l r4, r5, r6, r7 | |
116 | stmia dst!, {r4, r5, r6, r7} | |
117 | adcs sum, sum, r4 | |
118 | adcs sum, sum, r5 | |
119 | adcs sum, sum, r6 | |
120 | adcs sum, sum, r7 | |
121 | sub ip, ip, #16 | |
122 | teq ip, #0 | |
123 | bne 1b | |
124 | ||
125 | 2: ands ip, len, #12 | |
126 | beq 4f | |
127 | tst ip, #8 | |
128 | beq 3f | |
129 | load2l r4, r5 | |
130 | stmia dst!, {r4, r5} | |
131 | adcs sum, sum, r4 | |
132 | adcs sum, sum, r5 | |
133 | tst ip, #4 | |
134 | beq 4f | |
135 | ||
136 | 3: load1l r4 | |
137 | str r4, [dst], #4 | |
138 | adcs sum, sum, r4 | |
139 | ||
140 | 4: ands len, len, #3 | |
8adbb371 | 141 | beq .Ldone |
1da177e4 LT |
142 | load1l r4 |
143 | tst len, #2 | |
144 | mov r5, r4, get_byte_0 | |
8adbb371 | 145 | beq .Lexit |
d98b90ea | 146 | adcs sum, sum, r4, lspush #16 |
1da177e4 LT |
147 | strb r5, [dst], #1 |
148 | mov r5, r4, get_byte_1 | |
149 | strb r5, [dst], #1 | |
150 | mov r5, r4, get_byte_2 | |
8adbb371 | 151 | .Lexit: tst len, #1 |
1da177e4 LT |
152 | strneb r5, [dst], #1 |
153 | andne r5, r5, #255 | |
154 | adcnes sum, sum, r5, put_byte_0 | |
155 | ||
156 | /* | |
157 | * If the dst pointer was not 16-bit aligned, we | |
158 | * need to rotate the checksum here to get around | |
159 | * the inefficient byte manipulations in the | |
160 | * architecture independent code. | |
161 | */ | |
8adbb371 | 162 | .Ldone: adc r0, sum, #0 |
1da177e4 LT |
163 | ldr sum, [sp, #0] @ dst |
164 | tst sum, #1 | |
165 | movne r0, r0, ror #8 | |
90303b10 | 166 | load_regs |
1da177e4 | 167 | |
8adbb371 | 168 | .Lsrc_not_aligned: |
1da177e4 LT |
169 | adc sum, sum, #0 @ include C from dst alignment |
170 | and ip, src, #3 | |
171 | bic src, src, #3 | |
172 | load1l r5 | |
173 | cmp ip, #2 | |
8adbb371 NP |
174 | beq .Lsrc2_aligned |
175 | bhi .Lsrc3_aligned | |
d98b90ea | 176 | mov r4, r5, lspull #8 @ C = 0 |
1da177e4 LT |
177 | bics ip, len, #15 |
178 | beq 2f | |
179 | 1: load4l r5, r6, r7, r8 | |
d98b90ea VK |
180 | orr r4, r4, r5, lspush #24 |
181 | mov r5, r5, lspull #8 | |
182 | orr r5, r5, r6, lspush #24 | |
183 | mov r6, r6, lspull #8 | |
184 | orr r6, r6, r7, lspush #24 | |
185 | mov r7, r7, lspull #8 | |
186 | orr r7, r7, r8, lspush #24 | |
1da177e4 LT |
187 | stmia dst!, {r4, r5, r6, r7} |
188 | adcs sum, sum, r4 | |
189 | adcs sum, sum, r5 | |
190 | adcs sum, sum, r6 | |
191 | adcs sum, sum, r7 | |
d98b90ea | 192 | mov r4, r8, lspull #8 |
1da177e4 LT |
193 | sub ip, ip, #16 |
194 | teq ip, #0 | |
195 | bne 1b | |
196 | 2: ands ip, len, #12 | |
197 | beq 4f | |
198 | tst ip, #8 | |
199 | beq 3f | |
200 | load2l r5, r6 | |
d98b90ea VK |
201 | orr r4, r4, r5, lspush #24 |
202 | mov r5, r5, lspull #8 | |
203 | orr r5, r5, r6, lspush #24 | |
1da177e4 LT |
204 | stmia dst!, {r4, r5} |
205 | adcs sum, sum, r4 | |
206 | adcs sum, sum, r5 | |
d98b90ea | 207 | mov r4, r6, lspull #8 |
1da177e4 LT |
208 | tst ip, #4 |
209 | beq 4f | |
210 | 3: load1l r5 | |
d98b90ea | 211 | orr r4, r4, r5, lspush #24 |
1da177e4 LT |
212 | str r4, [dst], #4 |
213 | adcs sum, sum, r4 | |
d98b90ea | 214 | mov r4, r5, lspull #8 |
1da177e4 | 215 | 4: ands len, len, #3 |
8adbb371 | 216 | beq .Ldone |
1da177e4 LT |
217 | mov r5, r4, get_byte_0 |
218 | tst len, #2 | |
8adbb371 | 219 | beq .Lexit |
d98b90ea | 220 | adcs sum, sum, r4, lspush #16 |
1da177e4 LT |
221 | strb r5, [dst], #1 |
222 | mov r5, r4, get_byte_1 | |
223 | strb r5, [dst], #1 | |
224 | mov r5, r4, get_byte_2 | |
8adbb371 | 225 | b .Lexit |
1da177e4 | 226 | |
d98b90ea | 227 | .Lsrc2_aligned: mov r4, r5, lspull #16 |
1da177e4 LT |
228 | adds sum, sum, #0 |
229 | bics ip, len, #15 | |
230 | beq 2f | |
231 | 1: load4l r5, r6, r7, r8 | |
d98b90ea VK |
232 | orr r4, r4, r5, lspush #16 |
233 | mov r5, r5, lspull #16 | |
234 | orr r5, r5, r6, lspush #16 | |
235 | mov r6, r6, lspull #16 | |
236 | orr r6, r6, r7, lspush #16 | |
237 | mov r7, r7, lspull #16 | |
238 | orr r7, r7, r8, lspush #16 | |
1da177e4 LT |
239 | stmia dst!, {r4, r5, r6, r7} |
240 | adcs sum, sum, r4 | |
241 | adcs sum, sum, r5 | |
242 | adcs sum, sum, r6 | |
243 | adcs sum, sum, r7 | |
d98b90ea | 244 | mov r4, r8, lspull #16 |
1da177e4 LT |
245 | sub ip, ip, #16 |
246 | teq ip, #0 | |
247 | bne 1b | |
248 | 2: ands ip, len, #12 | |
249 | beq 4f | |
250 | tst ip, #8 | |
251 | beq 3f | |
252 | load2l r5, r6 | |
d98b90ea VK |
253 | orr r4, r4, r5, lspush #16 |
254 | mov r5, r5, lspull #16 | |
255 | orr r5, r5, r6, lspush #16 | |
1da177e4 LT |
256 | stmia dst!, {r4, r5} |
257 | adcs sum, sum, r4 | |
258 | adcs sum, sum, r5 | |
d98b90ea | 259 | mov r4, r6, lspull #16 |
1da177e4 LT |
260 | tst ip, #4 |
261 | beq 4f | |
262 | 3: load1l r5 | |
d98b90ea | 263 | orr r4, r4, r5, lspush #16 |
1da177e4 LT |
264 | str r4, [dst], #4 |
265 | adcs sum, sum, r4 | |
d98b90ea | 266 | mov r4, r5, lspull #16 |
1da177e4 | 267 | 4: ands len, len, #3 |
8adbb371 | 268 | beq .Ldone |
1da177e4 LT |
269 | mov r5, r4, get_byte_0 |
270 | tst len, #2 | |
8adbb371 | 271 | beq .Lexit |
1da177e4 LT |
272 | adcs sum, sum, r4 |
273 | strb r5, [dst], #1 | |
274 | mov r5, r4, get_byte_1 | |
275 | strb r5, [dst], #1 | |
276 | tst len, #1 | |
8adbb371 | 277 | beq .Ldone |
1da177e4 | 278 | load1b r5 |
8adbb371 | 279 | b .Lexit |
1da177e4 | 280 | |
d98b90ea | 281 | .Lsrc3_aligned: mov r4, r5, lspull #24 |
1da177e4 LT |
282 | adds sum, sum, #0 |
283 | bics ip, len, #15 | |
284 | beq 2f | |
285 | 1: load4l r5, r6, r7, r8 | |
d98b90ea VK |
286 | orr r4, r4, r5, lspush #8 |
287 | mov r5, r5, lspull #24 | |
288 | orr r5, r5, r6, lspush #8 | |
289 | mov r6, r6, lspull #24 | |
290 | orr r6, r6, r7, lspush #8 | |
291 | mov r7, r7, lspull #24 | |
292 | orr r7, r7, r8, lspush #8 | |
1da177e4 LT |
293 | stmia dst!, {r4, r5, r6, r7} |
294 | adcs sum, sum, r4 | |
295 | adcs sum, sum, r5 | |
296 | adcs sum, sum, r6 | |
297 | adcs sum, sum, r7 | |
d98b90ea | 298 | mov r4, r8, lspull #24 |
1da177e4 LT |
299 | sub ip, ip, #16 |
300 | teq ip, #0 | |
301 | bne 1b | |
302 | 2: ands ip, len, #12 | |
303 | beq 4f | |
304 | tst ip, #8 | |
305 | beq 3f | |
306 | load2l r5, r6 | |
d98b90ea VK |
307 | orr r4, r4, r5, lspush #8 |
308 | mov r5, r5, lspull #24 | |
309 | orr r5, r5, r6, lspush #8 | |
1da177e4 LT |
310 | stmia dst!, {r4, r5} |
311 | adcs sum, sum, r4 | |
312 | adcs sum, sum, r5 | |
d98b90ea | 313 | mov r4, r6, lspull #24 |
1da177e4 LT |
314 | tst ip, #4 |
315 | beq 4f | |
316 | 3: load1l r5 | |
d98b90ea | 317 | orr r4, r4, r5, lspush #8 |
1da177e4 LT |
318 | str r4, [dst], #4 |
319 | adcs sum, sum, r4 | |
d98b90ea | 320 | mov r4, r5, lspull #24 |
1da177e4 | 321 | 4: ands len, len, #3 |
8adbb371 | 322 | beq .Ldone |
1da177e4 LT |
323 | mov r5, r4, get_byte_0 |
324 | tst len, #2 | |
8adbb371 | 325 | beq .Lexit |
1da177e4 LT |
326 | strb r5, [dst], #1 |
327 | adcs sum, sum, r4 | |
328 | load1l r4 | |
329 | mov r5, r4, get_byte_0 | |
330 | strb r5, [dst], #1 | |
d98b90ea | 331 | adcs sum, sum, r4, lspush #24 |
1da177e4 | 332 | mov r5, r4, get_byte_1 |
8adbb371 | 333 | b .Lexit |
93ed3970 | 334 | FN_EXIT |
4dd1837d | 335 | FN_EXPORT |