Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines | |
3 | * | |
2f82af08 | 4 | * Author: Nicolas Pitre <nico@fluxnic.net> |
1da177e4 LT |
5 | * - contributed to gcc-3.4 on Sep 30, 2003 |
6 | * - adapted for the Linux kernel on Oct 2, 2003 | |
7 | */ | |
8 | ||
9 | /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. | |
10 | ||
11 | This file is free software; you can redistribute it and/or modify it | |
12 | under the terms of the GNU General Public License as published by the | |
13 | Free Software Foundation; either version 2, or (at your option) any | |
14 | later version. | |
15 | ||
16 | In addition to the permissions in the GNU General Public License, the | |
17 | Free Software Foundation gives you unlimited permission to link the | |
18 | compiled version of this file into combinations with other programs, | |
19 | and to distribute those combinations without any restriction coming | |
20 | from the use of this file. (The General Public License restrictions | |
21 | do apply in other respects; for example, they cover modification of | |
22 | the file, and distribution when not linked into a combine | |
23 | executable.) | |
24 | ||
25 | This file is distributed in the hope that it will be useful, but | |
26 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
27 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
28 | General Public License for more details. | |
29 | ||
30 | You should have received a copy of the GNU General Public License | |
31 | along with this program; see the file COPYING. If not, write to | |
32 | the Free Software Foundation, 59 Temple Place - Suite 330, | |
33 | Boston, MA 02111-1307, USA. */ | |
34 | ||
35 | ||
36 | #include <linux/linkage.h> | |
37 | #include <asm/assembler.h> | |
81479c24 | 38 | #include <asm/unwind.h> |
4dd1837d | 39 | #include <asm/export.h> |
1da177e4 LT |
40 | |
41 | .macro ARM_DIV_BODY dividend, divisor, result, curbit | |
42 | ||
43 | #if __LINUX_ARM_ARCH__ >= 5 | |
44 | ||
45 | clz \curbit, \divisor | |
46 | clz \result, \dividend | |
47 | sub \result, \curbit, \result | |
48 | mov \curbit, #1 | |
49 | mov \divisor, \divisor, lsl \result | |
50 | mov \curbit, \curbit, lsl \result | |
51 | mov \result, #0 | |
52 | ||
53 | #else | |
54 | ||
55 | @ Initially shift the divisor left 3 bits if possible, | |
56 | @ set curbit accordingly. This allows for curbit to be located | |
57 | @ at the left end of each 4 bit nibbles in the division loop | |
58 | @ to save one loop in most cases. | |
59 | tst \divisor, #0xe0000000 | |
60 | moveq \divisor, \divisor, lsl #3 | |
61 | moveq \curbit, #8 | |
62 | movne \curbit, #1 | |
63 | ||
64 | @ Unless the divisor is very big, shift it up in multiples of | |
65 | @ four bits, since this is the amount of unwinding in the main | |
66 | @ division loop. Continue shifting until the divisor is | |
67 | @ larger than the dividend. | |
68 | 1: cmp \divisor, #0x10000000 | |
69 | cmplo \divisor, \dividend | |
70 | movlo \divisor, \divisor, lsl #4 | |
71 | movlo \curbit, \curbit, lsl #4 | |
72 | blo 1b | |
73 | ||
74 | @ For very big divisors, we must shift it a bit at a time, or | |
75 | @ we will be in danger of overflowing. | |
76 | 1: cmp \divisor, #0x80000000 | |
77 | cmplo \divisor, \dividend | |
78 | movlo \divisor, \divisor, lsl #1 | |
79 | movlo \curbit, \curbit, lsl #1 | |
80 | blo 1b | |
81 | ||
82 | mov \result, #0 | |
83 | ||
84 | #endif | |
85 | ||
86 | @ Division loop | |
87 | 1: cmp \dividend, \divisor | |
88 | subhs \dividend, \dividend, \divisor | |
89 | orrhs \result, \result, \curbit | |
90 | cmp \dividend, \divisor, lsr #1 | |
91 | subhs \dividend, \dividend, \divisor, lsr #1 | |
92 | orrhs \result, \result, \curbit, lsr #1 | |
93 | cmp \dividend, \divisor, lsr #2 | |
94 | subhs \dividend, \dividend, \divisor, lsr #2 | |
95 | orrhs \result, \result, \curbit, lsr #2 | |
96 | cmp \dividend, \divisor, lsr #3 | |
97 | subhs \dividend, \dividend, \divisor, lsr #3 | |
98 | orrhs \result, \result, \curbit, lsr #3 | |
99 | cmp \dividend, #0 @ Early termination? | |
100 | movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? | |
101 | movne \divisor, \divisor, lsr #4 | |
102 | bne 1b | |
103 | ||
104 | .endm | |
105 | ||
106 | ||
107 | .macro ARM_DIV2_ORDER divisor, order | |
108 | ||
109 | #if __LINUX_ARM_ARCH__ >= 5 | |
110 | ||
111 | clz \order, \divisor | |
112 | rsb \order, \order, #31 | |
113 | ||
114 | #else | |
115 | ||
116 | cmp \divisor, #(1 << 16) | |
117 | movhs \divisor, \divisor, lsr #16 | |
118 | movhs \order, #16 | |
119 | movlo \order, #0 | |
120 | ||
121 | cmp \divisor, #(1 << 8) | |
122 | movhs \divisor, \divisor, lsr #8 | |
123 | addhs \order, \order, #8 | |
124 | ||
125 | cmp \divisor, #(1 << 4) | |
126 | movhs \divisor, \divisor, lsr #4 | |
127 | addhs \order, \order, #4 | |
128 | ||
129 | cmp \divisor, #(1 << 2) | |
130 | addhi \order, \order, #3 | |
131 | addls \order, \order, \divisor, lsr #1 | |
132 | ||
133 | #endif | |
134 | ||
135 | .endm | |
136 | ||
137 | ||
138 | .macro ARM_MOD_BODY dividend, divisor, order, spare | |
139 | ||
140 | #if __LINUX_ARM_ARCH__ >= 5 | |
141 | ||
142 | clz \order, \divisor | |
143 | clz \spare, \dividend | |
144 | sub \order, \order, \spare | |
145 | mov \divisor, \divisor, lsl \order | |
146 | ||
147 | #else | |
148 | ||
149 | mov \order, #0 | |
150 | ||
151 | @ Unless the divisor is very big, shift it up in multiples of | |
152 | @ four bits, since this is the amount of unwinding in the main | |
153 | @ division loop. Continue shifting until the divisor is | |
154 | @ larger than the dividend. | |
155 | 1: cmp \divisor, #0x10000000 | |
156 | cmplo \divisor, \dividend | |
157 | movlo \divisor, \divisor, lsl #4 | |
158 | addlo \order, \order, #4 | |
159 | blo 1b | |
160 | ||
161 | @ For very big divisors, we must shift it a bit at a time, or | |
162 | @ we will be in danger of overflowing. | |
163 | 1: cmp \divisor, #0x80000000 | |
164 | cmplo \divisor, \dividend | |
165 | movlo \divisor, \divisor, lsl #1 | |
166 | addlo \order, \order, #1 | |
167 | blo 1b | |
168 | ||
169 | #endif | |
170 | ||
82350ab1 | 171 | @ Perform all needed subtractions to keep only the reminder. |
1da177e4 LT |
172 | @ Do comparisons in batch of 4 first. |
173 | subs \order, \order, #3 @ yes, 3 is intended here | |
174 | blt 2f | |
175 | ||
176 | 1: cmp \dividend, \divisor | |
177 | subhs \dividend, \dividend, \divisor | |
178 | cmp \dividend, \divisor, lsr #1 | |
179 | subhs \dividend, \dividend, \divisor, lsr #1 | |
180 | cmp \dividend, \divisor, lsr #2 | |
181 | subhs \dividend, \dividend, \divisor, lsr #2 | |
182 | cmp \dividend, \divisor, lsr #3 | |
183 | subhs \dividend, \dividend, \divisor, lsr #3 | |
184 | cmp \dividend, #1 | |
185 | mov \divisor, \divisor, lsr #4 | |
186 | subges \order, \order, #4 | |
187 | bge 1b | |
188 | ||
189 | tst \order, #3 | |
190 | teqne \dividend, #0 | |
191 | beq 5f | |
192 | ||
82350ab1 | 193 | @ Either 1, 2 or 3 comparison/subtractions are left. |
1da177e4 LT |
194 | 2: cmn \order, #2 |
195 | blt 4f | |
196 | beq 3f | |
197 | cmp \dividend, \divisor | |
198 | subhs \dividend, \dividend, \divisor | |
199 | mov \divisor, \divisor, lsr #1 | |
200 | 3: cmp \dividend, \divisor | |
201 | subhs \dividend, \dividend, \divisor | |
202 | mov \divisor, \divisor, lsr #1 | |
203 | 4: cmp \dividend, \divisor | |
204 | subhs \dividend, \dividend, \divisor | |
205 | 5: | |
206 | .endm | |
207 | ||
208 | ||
42f25bdd NP |
209 | #ifdef CONFIG_ARM_PATCH_IDIV |
210 | .align 3 | |
211 | #endif | |
212 | ||
1da177e4 | 213 | ENTRY(__udivsi3) |
ba95e4e4 | 214 | ENTRY(__aeabi_uidiv) |
81479c24 | 215 | UNWIND(.fnstart) |
1da177e4 LT |
216 | |
217 | subs r2, r1, #1 | |
6ebbf2ce | 218 | reteq lr |
1da177e4 LT |
219 | bcc Ldiv0 |
220 | cmp r0, r1 | |
221 | bls 11f | |
222 | tst r1, r2 | |
223 | beq 12f | |
224 | ||
225 | ARM_DIV_BODY r0, r1, r2, r3 | |
226 | ||
227 | mov r0, r2 | |
6ebbf2ce | 228 | ret lr |
1da177e4 LT |
229 | |
230 | 11: moveq r0, #1 | |
231 | movne r0, #0 | |
6ebbf2ce | 232 | ret lr |
1da177e4 LT |
233 | |
234 | 12: ARM_DIV2_ORDER r1, r2 | |
235 | ||
236 | mov r0, r0, lsr r2 | |
6ebbf2ce | 237 | ret lr |
1da177e4 | 238 | |
81479c24 | 239 | UNWIND(.fnend) |
93ed3970 CM |
240 | ENDPROC(__udivsi3) |
241 | ENDPROC(__aeabi_uidiv) | |
4dd1837d AV |
242 | EXPORT_SYMBOL(__udivsi3) |
243 | EXPORT_SYMBOL(__aeabi_uidiv) | |
1da177e4 LT |
244 | |
245 | ENTRY(__umodsi3) | |
81479c24 | 246 | UNWIND(.fnstart) |
1da177e4 LT |
247 | |
248 | subs r2, r1, #1 @ compare divisor with 1 | |
249 | bcc Ldiv0 | |
250 | cmpne r0, r1 @ compare dividend with divisor | |
251 | moveq r0, #0 | |
252 | tsthi r1, r2 @ see if divisor is power of 2 | |
253 | andeq r0, r0, r2 | |
6ebbf2ce | 254 | retls lr |
1da177e4 LT |
255 | |
256 | ARM_MOD_BODY r0, r1, r2, r3 | |
257 | ||
6ebbf2ce | 258 | ret lr |
1da177e4 | 259 | |
81479c24 | 260 | UNWIND(.fnend) |
93ed3970 | 261 | ENDPROC(__umodsi3) |
4dd1837d | 262 | EXPORT_SYMBOL(__umodsi3) |
1da177e4 | 263 | |
42f25bdd NP |
264 | #ifdef CONFIG_ARM_PATCH_IDIV |
265 | .align 3 | |
266 | #endif | |
267 | ||
1da177e4 | 268 | ENTRY(__divsi3) |
ba95e4e4 | 269 | ENTRY(__aeabi_idiv) |
81479c24 | 270 | UNWIND(.fnstart) |
1da177e4 LT |
271 | |
272 | cmp r1, #0 | |
273 | eor ip, r0, r1 @ save the sign of the result. | |
274 | beq Ldiv0 | |
275 | rsbmi r1, r1, #0 @ loops below use unsigned. | |
276 | subs r2, r1, #1 @ division by 1 or -1 ? | |
277 | beq 10f | |
278 | movs r3, r0 | |
279 | rsbmi r3, r0, #0 @ positive dividend value | |
280 | cmp r3, r1 | |
281 | bls 11f | |
282 | tst r1, r2 @ divisor is power of 2 ? | |
283 | beq 12f | |
284 | ||
285 | ARM_DIV_BODY r3, r1, r0, r2 | |
286 | ||
287 | cmp ip, #0 | |
288 | rsbmi r0, r0, #0 | |
6ebbf2ce | 289 | ret lr |
1da177e4 LT |
290 | |
291 | 10: teq ip, r0 @ same sign ? | |
292 | rsbmi r0, r0, #0 | |
6ebbf2ce | 293 | ret lr |
1da177e4 LT |
294 | |
295 | 11: movlo r0, #0 | |
296 | moveq r0, ip, asr #31 | |
297 | orreq r0, r0, #1 | |
6ebbf2ce | 298 | ret lr |
1da177e4 LT |
299 | |
300 | 12: ARM_DIV2_ORDER r1, r2 | |
301 | ||
302 | cmp ip, #0 | |
303 | mov r0, r3, lsr r2 | |
304 | rsbmi r0, r0, #0 | |
6ebbf2ce | 305 | ret lr |
1da177e4 | 306 | |
81479c24 | 307 | UNWIND(.fnend) |
93ed3970 CM |
308 | ENDPROC(__divsi3) |
309 | ENDPROC(__aeabi_idiv) | |
4dd1837d AV |
310 | EXPORT_SYMBOL(__divsi3) |
311 | EXPORT_SYMBOL(__aeabi_idiv) | |
1da177e4 LT |
312 | |
313 | ENTRY(__modsi3) | |
81479c24 | 314 | UNWIND(.fnstart) |
1da177e4 LT |
315 | |
316 | cmp r1, #0 | |
317 | beq Ldiv0 | |
318 | rsbmi r1, r1, #0 @ loops below use unsigned. | |
319 | movs ip, r0 @ preserve sign of dividend | |
320 | rsbmi r0, r0, #0 @ if negative make positive | |
321 | subs r2, r1, #1 @ compare divisor with 1 | |
322 | cmpne r0, r1 @ compare dividend with divisor | |
323 | moveq r0, #0 | |
324 | tsthi r1, r2 @ see if divisor is power of 2 | |
325 | andeq r0, r0, r2 | |
326 | bls 10f | |
327 | ||
328 | ARM_MOD_BODY r0, r1, r2, r3 | |
329 | ||
330 | 10: cmp ip, #0 | |
331 | rsbmi r0, r0, #0 | |
6ebbf2ce | 332 | ret lr |
1da177e4 | 333 | |
81479c24 | 334 | UNWIND(.fnend) |
93ed3970 | 335 | ENDPROC(__modsi3) |
4dd1837d | 336 | EXPORT_SYMBOL(__modsi3) |
93ed3970 | 337 | |
ba95e4e4 NP |
338 | #ifdef CONFIG_AEABI |
339 | ||
340 | ENTRY(__aeabi_uidivmod) | |
81479c24 LA |
341 | UNWIND(.fnstart) |
342 | UNWIND(.save {r0, r1, ip, lr} ) | |
ba95e4e4 NP |
343 | |
344 | stmfd sp!, {r0, r1, ip, lr} | |
345 | bl __aeabi_uidiv | |
346 | ldmfd sp!, {r1, r2, ip, lr} | |
347 | mul r3, r0, r2 | |
348 | sub r1, r1, r3 | |
6ebbf2ce | 349 | ret lr |
ba95e4e4 | 350 | |
81479c24 | 351 | UNWIND(.fnend) |
93ed3970 | 352 | ENDPROC(__aeabi_uidivmod) |
4dd1837d | 353 | EXPORT_SYMBOL(__aeabi_uidivmod) |
93ed3970 | 354 | |
ba95e4e4 | 355 | ENTRY(__aeabi_idivmod) |
81479c24 LA |
356 | UNWIND(.fnstart) |
357 | UNWIND(.save {r0, r1, ip, lr} ) | |
ba95e4e4 NP |
358 | stmfd sp!, {r0, r1, ip, lr} |
359 | bl __aeabi_idiv | |
360 | ldmfd sp!, {r1, r2, ip, lr} | |
361 | mul r3, r0, r2 | |
362 | sub r1, r1, r3 | |
6ebbf2ce | 363 | ret lr |
ba95e4e4 | 364 | |
81479c24 | 365 | UNWIND(.fnend) |
93ed3970 | 366 | ENDPROC(__aeabi_idivmod) |
4dd1837d | 367 | EXPORT_SYMBOL(__aeabi_idivmod) |
93ed3970 | 368 | |
ba95e4e4 | 369 | #endif |
1da177e4 LT |
370 | |
371 | Ldiv0: | |
81479c24 LA |
372 | UNWIND(.fnstart) |
373 | UNWIND(.pad #4) | |
374 | UNWIND(.save {lr}) | |
499b2ea1 | 375 | str lr, [sp, #-8]! |
1da177e4 LT |
376 | bl __div0 |
377 | mov r0, #0 @ About as wrong as it could be. | |
499b2ea1 | 378 | ldr pc, [sp], #8 |
81479c24 LA |
379 | UNWIND(.fnend) |
380 | ENDPROC(Ldiv0) |