Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/csumpartialcopygeneric.S | |
3 | * | |
4 | * Copyright (C) 1995-2001 Russell King | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | /* | |
12 | * unsigned int | |
13 | * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) | |
14 | * r0 = src, r1 = dst, r2 = len, r3 = sum | |
15 | * Returns : r0 = checksum | |
16 | * | |
17 | * Note that 'tst' and 'teq' preserve the carry flag. | |
18 | */ | |
19 | ||
20 | src .req r0 | |
21 | dst .req r1 | |
22 | len .req r2 | |
23 | sum .req r3 | |
24 | ||
8adbb371 | 25 | .Lzero: mov r0, sum |
90303b10 | 26 | load_regs |
1da177e4 LT |
27 | |
28 | /* | |
29 | * Align an unaligned destination pointer. We know that | |
30 | * we have >= 8 bytes here, so we don't need to check | |
31 | * the length. Note that the source pointer hasn't been | |
32 | * aligned yet. | |
33 | */ | |
8adbb371 NP |
34 | .Ldst_unaligned: |
35 | tst dst, #1 | |
36 | beq .Ldst_16bit | |
1da177e4 LT |
37 | |
38 | load1b ip | |
39 | sub len, len, #1 | |
40 | adcs sum, sum, ip, put_byte_1 @ update checksum | |
41 | strb ip, [dst], #1 | |
42 | tst dst, #2 | |
43 | moveq pc, lr @ dst is now 32bit aligned | |
44 | ||
8adbb371 | 45 | .Ldst_16bit: load2b r8, ip |
1da177e4 LT |
46 | sub len, len, #2 |
47 | adcs sum, sum, r8, put_byte_0 | |
48 | strb r8, [dst], #1 | |
49 | adcs sum, sum, ip, put_byte_1 | |
50 | strb ip, [dst], #1 | |
51 | mov pc, lr @ dst is now 32bit aligned | |
52 | ||
53 | /* | |
54 | * Handle 0 to 7 bytes, with any alignment of source and | |
55 | * destination pointers. Note that when we get here, C = 0 | |
56 | */ | |
8adbb371 NP |
57 | .Lless8: teq len, #0 @ check for zero count |
58 | beq .Lzero | |
1da177e4 LT |
59 | |
60 | /* we must have at least one byte. */ | |
61 | tst dst, #1 @ dst 16-bit aligned | |
8adbb371 | 62 | beq .Lless8_aligned |
1da177e4 LT |
63 | |
64 | /* Align dst */ | |
65 | load1b ip | |
66 | sub len, len, #1 | |
67 | adcs sum, sum, ip, put_byte_1 @ update checksum | |
68 | strb ip, [dst], #1 | |
69 | tst len, #6 | |
8adbb371 | 70 | beq .Lless8_byteonly |
1da177e4 LT |
71 | |
72 | 1: load2b r8, ip | |
73 | sub len, len, #2 | |
74 | adcs sum, sum, r8, put_byte_0 | |
75 | strb r8, [dst], #1 | |
76 | adcs sum, sum, ip, put_byte_1 | |
77 | strb ip, [dst], #1 | |
8adbb371 NP |
78 | .Lless8_aligned: |
79 | tst len, #6 | |
1da177e4 | 80 | bne 1b |
8adbb371 | 81 | .Lless8_byteonly: |
1da177e4 | 82 | tst len, #1 |
8adbb371 | 83 | beq .Ldone |
1da177e4 LT |
84 | load1b r8 |
85 | adcs sum, sum, r8, put_byte_0 @ update checksum | |
86 | strb r8, [dst], #1 | |
8adbb371 | 87 | b .Ldone |
1da177e4 LT |
88 | |
89 | FN_ENTRY | |
1da177e4 | 90 | save_regs |
1da177e4 LT |
91 | |
92 | cmp len, #8 @ Ensure that we have at least | |
8adbb371 | 93 | blo .Lless8 @ 8 bytes to copy. |
1da177e4 LT |
94 | |
95 | adds sum, sum, #0 @ C = 0 | |
96 | tst dst, #3 @ Test destination alignment | |
8adbb371 | 97 | blne .Ldst_unaligned @ align destination, return here |
1da177e4 LT |
98 | |
99 | /* | |
100 | * Ok, the dst pointer is now 32bit aligned, and we know | |
101 | * that we must have more than 4 bytes to copy. Note | |
102 | * that C contains the carry from the dst alignment above. | |
103 | */ | |
104 | ||
105 | tst src, #3 @ Test source alignment | |
8adbb371 | 106 | bne .Lsrc_not_aligned |
1da177e4 LT |
107 | |
108 | /* Routine for src & dst aligned */ | |
109 | ||
110 | bics ip, len, #15 | |
111 | beq 2f | |
112 | ||
113 | 1: load4l r4, r5, r6, r7 | |
114 | stmia dst!, {r4, r5, r6, r7} | |
115 | adcs sum, sum, r4 | |
116 | adcs sum, sum, r5 | |
117 | adcs sum, sum, r6 | |
118 | adcs sum, sum, r7 | |
119 | sub ip, ip, #16 | |
120 | teq ip, #0 | |
121 | bne 1b | |
122 | ||
123 | 2: ands ip, len, #12 | |
124 | beq 4f | |
125 | tst ip, #8 | |
126 | beq 3f | |
127 | load2l r4, r5 | |
128 | stmia dst!, {r4, r5} | |
129 | adcs sum, sum, r4 | |
130 | adcs sum, sum, r5 | |
131 | tst ip, #4 | |
132 | beq 4f | |
133 | ||
134 | 3: load1l r4 | |
135 | str r4, [dst], #4 | |
136 | adcs sum, sum, r4 | |
137 | ||
138 | 4: ands len, len, #3 | |
8adbb371 | 139 | beq .Ldone |
1da177e4 LT |
140 | load1l r4 |
141 | tst len, #2 | |
142 | mov r5, r4, get_byte_0 | |
8adbb371 | 143 | beq .Lexit |
1da177e4 LT |
144 | adcs sum, sum, r4, push #16 |
145 | strb r5, [dst], #1 | |
146 | mov r5, r4, get_byte_1 | |
147 | strb r5, [dst], #1 | |
148 | mov r5, r4, get_byte_2 | |
8adbb371 | 149 | .Lexit: tst len, #1 |
1da177e4 LT |
150 | strneb r5, [dst], #1 |
151 | andne r5, r5, #255 | |
152 | adcnes sum, sum, r5, put_byte_0 | |
153 | ||
154 | /* | |
155 | * If the dst pointer was not 16-bit aligned, we | |
156 | * need to rotate the checksum here to get around | |
157 | * the inefficient byte manipulations in the | |
158 | * architecture independent code. | |
159 | */ | |
8adbb371 | 160 | .Ldone: adc r0, sum, #0 |
1da177e4 LT |
161 | ldr sum, [sp, #0] @ dst |
162 | tst sum, #1 | |
163 | movne r0, r0, ror #8 | |
90303b10 | 164 | load_regs |
1da177e4 | 165 | |
8adbb371 | 166 | .Lsrc_not_aligned: |
1da177e4 LT |
167 | adc sum, sum, #0 @ include C from dst alignment |
168 | and ip, src, #3 | |
169 | bic src, src, #3 | |
170 | load1l r5 | |
171 | cmp ip, #2 | |
8adbb371 NP |
172 | beq .Lsrc2_aligned |
173 | bhi .Lsrc3_aligned | |
1da177e4 LT |
174 | mov r4, r5, pull #8 @ C = 0 |
175 | bics ip, len, #15 | |
176 | beq 2f | |
177 | 1: load4l r5, r6, r7, r8 | |
178 | orr r4, r4, r5, push #24 | |
179 | mov r5, r5, pull #8 | |
180 | orr r5, r5, r6, push #24 | |
181 | mov r6, r6, pull #8 | |
182 | orr r6, r6, r7, push #24 | |
183 | mov r7, r7, pull #8 | |
184 | orr r7, r7, r8, push #24 | |
185 | stmia dst!, {r4, r5, r6, r7} | |
186 | adcs sum, sum, r4 | |
187 | adcs sum, sum, r5 | |
188 | adcs sum, sum, r6 | |
189 | adcs sum, sum, r7 | |
190 | mov r4, r8, pull #8 | |
191 | sub ip, ip, #16 | |
192 | teq ip, #0 | |
193 | bne 1b | |
194 | 2: ands ip, len, #12 | |
195 | beq 4f | |
196 | tst ip, #8 | |
197 | beq 3f | |
198 | load2l r5, r6 | |
199 | orr r4, r4, r5, push #24 | |
200 | mov r5, r5, pull #8 | |
201 | orr r5, r5, r6, push #24 | |
202 | stmia dst!, {r4, r5} | |
203 | adcs sum, sum, r4 | |
204 | adcs sum, sum, r5 | |
205 | mov r4, r6, pull #8 | |
206 | tst ip, #4 | |
207 | beq 4f | |
208 | 3: load1l r5 | |
209 | orr r4, r4, r5, push #24 | |
210 | str r4, [dst], #4 | |
211 | adcs sum, sum, r4 | |
212 | mov r4, r5, pull #8 | |
213 | 4: ands len, len, #3 | |
8adbb371 | 214 | beq .Ldone |
1da177e4 LT |
215 | mov r5, r4, get_byte_0 |
216 | tst len, #2 | |
8adbb371 | 217 | beq .Lexit |
1da177e4 LT |
218 | adcs sum, sum, r4, push #16 |
219 | strb r5, [dst], #1 | |
220 | mov r5, r4, get_byte_1 | |
221 | strb r5, [dst], #1 | |
222 | mov r5, r4, get_byte_2 | |
8adbb371 | 223 | b .Lexit |
1da177e4 | 224 | |
8adbb371 | 225 | .Lsrc2_aligned: mov r4, r5, pull #16 |
1da177e4 LT |
226 | adds sum, sum, #0 |
227 | bics ip, len, #15 | |
228 | beq 2f | |
229 | 1: load4l r5, r6, r7, r8 | |
230 | orr r4, r4, r5, push #16 | |
231 | mov r5, r5, pull #16 | |
232 | orr r5, r5, r6, push #16 | |
233 | mov r6, r6, pull #16 | |
234 | orr r6, r6, r7, push #16 | |
235 | mov r7, r7, pull #16 | |
236 | orr r7, r7, r8, push #16 | |
237 | stmia dst!, {r4, r5, r6, r7} | |
238 | adcs sum, sum, r4 | |
239 | adcs sum, sum, r5 | |
240 | adcs sum, sum, r6 | |
241 | adcs sum, sum, r7 | |
242 | mov r4, r8, pull #16 | |
243 | sub ip, ip, #16 | |
244 | teq ip, #0 | |
245 | bne 1b | |
246 | 2: ands ip, len, #12 | |
247 | beq 4f | |
248 | tst ip, #8 | |
249 | beq 3f | |
250 | load2l r5, r6 | |
251 | orr r4, r4, r5, push #16 | |
252 | mov r5, r5, pull #16 | |
253 | orr r5, r5, r6, push #16 | |
254 | stmia dst!, {r4, r5} | |
255 | adcs sum, sum, r4 | |
256 | adcs sum, sum, r5 | |
257 | mov r4, r6, pull #16 | |
258 | tst ip, #4 | |
259 | beq 4f | |
260 | 3: load1l r5 | |
261 | orr r4, r4, r5, push #16 | |
262 | str r4, [dst], #4 | |
263 | adcs sum, sum, r4 | |
264 | mov r4, r5, pull #16 | |
265 | 4: ands len, len, #3 | |
8adbb371 | 266 | beq .Ldone |
1da177e4 LT |
267 | mov r5, r4, get_byte_0 |
268 | tst len, #2 | |
8adbb371 | 269 | beq .Lexit |
1da177e4 LT |
270 | adcs sum, sum, r4 |
271 | strb r5, [dst], #1 | |
272 | mov r5, r4, get_byte_1 | |
273 | strb r5, [dst], #1 | |
274 | tst len, #1 | |
8adbb371 | 275 | beq .Ldone |
1da177e4 | 276 | load1b r5 |
8adbb371 | 277 | b .Lexit |
1da177e4 | 278 | |
8adbb371 | 279 | .Lsrc3_aligned: mov r4, r5, pull #24 |
1da177e4 LT |
280 | adds sum, sum, #0 |
281 | bics ip, len, #15 | |
282 | beq 2f | |
283 | 1: load4l r5, r6, r7, r8 | |
284 | orr r4, r4, r5, push #8 | |
285 | mov r5, r5, pull #24 | |
286 | orr r5, r5, r6, push #8 | |
287 | mov r6, r6, pull #24 | |
288 | orr r6, r6, r7, push #8 | |
289 | mov r7, r7, pull #24 | |
290 | orr r7, r7, r8, push #8 | |
291 | stmia dst!, {r4, r5, r6, r7} | |
292 | adcs sum, sum, r4 | |
293 | adcs sum, sum, r5 | |
294 | adcs sum, sum, r6 | |
295 | adcs sum, sum, r7 | |
296 | mov r4, r8, pull #24 | |
297 | sub ip, ip, #16 | |
298 | teq ip, #0 | |
299 | bne 1b | |
300 | 2: ands ip, len, #12 | |
301 | beq 4f | |
302 | tst ip, #8 | |
303 | beq 3f | |
304 | load2l r5, r6 | |
305 | orr r4, r4, r5, push #8 | |
306 | mov r5, r5, pull #24 | |
307 | orr r5, r5, r6, push #8 | |
308 | stmia dst!, {r4, r5} | |
309 | adcs sum, sum, r4 | |
310 | adcs sum, sum, r5 | |
311 | mov r4, r6, pull #24 | |
312 | tst ip, #4 | |
313 | beq 4f | |
314 | 3: load1l r5 | |
315 | orr r4, r4, r5, push #8 | |
316 | str r4, [dst], #4 | |
317 | adcs sum, sum, r4 | |
318 | mov r4, r5, pull #24 | |
319 | 4: ands len, len, #3 | |
8adbb371 | 320 | beq .Ldone |
1da177e4 LT |
321 | mov r5, r4, get_byte_0 |
322 | tst len, #2 | |
8adbb371 | 323 | beq .Lexit |
1da177e4 LT |
324 | strb r5, [dst], #1 |
325 | adcs sum, sum, r4 | |
326 | load1l r4 | |
327 | mov r5, r4, get_byte_0 | |
328 | strb r5, [dst], #1 | |
329 | adcs sum, sum, r4, push #24 | |
330 | mov r5, r4, get_byte_1 | |
8adbb371 | 331 | b .Lexit |
93ed3970 | 332 | FN_EXIT |