2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
9 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
11 This file is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by the
13 Free Software Foundation; either version 2, or (at your option) any
16 In addition to the permissions in the GNU General Public License, the
17 Free Software Foundation gives you unlimited permission to link the
18 compiled version of this file into combinations with other programs,
19 and to distribute those combinations without any restriction coming
20 from the use of this file. (The General Public License restrictions
21 do apply in other respects; for example, they cover modification of
22 the file, and distribution when not linked into a combine
25 This file is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 General Public License for more details.
30 You should have received a copy of the GNU General Public License
31 along with this program; see the file COPYING. If not, write to
32 the Free Software Foundation, 59 Temple Place - Suite 330,
33 Boston, MA 02111-1307, USA. */
36 #include <linux/linkage.h>
37 #include <asm/assembler.h>
38 #include <asm/unwind.h>
39 #include <asm/export.h>
41 .macro ARM_DIV_BODY dividend, divisor, result, curbit
43 #if __LINUX_ARM_ARCH__ >= 5
46 clz \result, \dividend
47 sub \result, \curbit, \result
49 mov \divisor, \divisor, lsl \result
50 mov \curbit, \curbit, lsl \result
55 @ Initially shift the divisor left 3 bits if possible,
56 @ set curbit accordingly. This allows for curbit to be located
57 @ at the left end of each 4 bit nibbles in the division loop
58 @ to save one loop in most cases.
59 tst \divisor, #0xe0000000
60 moveq \divisor, \divisor, lsl #3
64 @ Unless the divisor is very big, shift it up in multiples of
65 @ four bits, since this is the amount of unwinding in the main
66 @ division loop. Continue shifting until the divisor is
67 @ larger than the dividend.
68 1: cmp \divisor, #0x10000000
69 cmplo \divisor, \dividend
70 movlo \divisor, \divisor, lsl #4
71 movlo \curbit, \curbit, lsl #4
74 @ For very big divisors, we must shift it a bit at a time, or
75 @ we will be in danger of overflowing.
76 1: cmp \divisor, #0x80000000
77 cmplo \divisor, \dividend
78 movlo \divisor, \divisor, lsl #1
79 movlo \curbit, \curbit, lsl #1
87 1: cmp \dividend, \divisor
88 subhs \dividend, \dividend, \divisor
89 orrhs \result, \result, \curbit
90 cmp \dividend, \divisor, lsr #1
91 subhs \dividend, \dividend, \divisor, lsr #1
92 orrhs \result, \result, \curbit, lsr #1
93 cmp \dividend, \divisor, lsr #2
94 subhs \dividend, \dividend, \divisor, lsr #2
95 orrhs \result, \result, \curbit, lsr #2
96 cmp \dividend, \divisor, lsr #3
97 subhs \dividend, \dividend, \divisor, lsr #3
98 orrhs \result, \result, \curbit, lsr #3
99 cmp \dividend, #0 @ Early termination?
100 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
101 movne \divisor, \divisor, lsr #4
107 .macro ARM_DIV2_ORDER divisor, order
109 #if __LINUX_ARM_ARCH__ >= 5
112 rsb \order, \order, #31
116 cmp \divisor, #(1 << 16)
117 movhs \divisor, \divisor, lsr #16
121 cmp \divisor, #(1 << 8)
122 movhs \divisor, \divisor, lsr #8
123 addhs \order, \order, #8
125 cmp \divisor, #(1 << 4)
126 movhs \divisor, \divisor, lsr #4
127 addhs \order, \order, #4
129 cmp \divisor, #(1 << 2)
130 addhi \order, \order, #3
131 addls \order, \order, \divisor, lsr #1
138 .macro ARM_MOD_BODY dividend, divisor, order, spare
140 #if __LINUX_ARM_ARCH__ >= 5
143 clz \spare, \dividend
144 sub \order, \order, \spare
145 mov \divisor, \divisor, lsl \order
151 @ Unless the divisor is very big, shift it up in multiples of
152 @ four bits, since this is the amount of unwinding in the main
153 @ division loop. Continue shifting until the divisor is
154 @ larger than the dividend.
155 1: cmp \divisor, #0x10000000
156 cmplo \divisor, \dividend
157 movlo \divisor, \divisor, lsl #4
158 addlo \order, \order, #4
161 @ For very big divisors, we must shift it a bit at a time, or
162 @ we will be in danger of overflowing.
163 1: cmp \divisor, #0x80000000
164 cmplo \divisor, \dividend
165 movlo \divisor, \divisor, lsl #1
166 addlo \order, \order, #1
171 @ Perform all needed subtractions to keep only the reminder.
172 @ Do comparisons in batch of 4 first.
173 subs \order, \order, #3 @ yes, 3 is intended here
176 1: cmp \dividend, \divisor
177 subhs \dividend, \dividend, \divisor
178 cmp \dividend, \divisor, lsr #1
179 subhs \dividend, \dividend, \divisor, lsr #1
180 cmp \dividend, \divisor, lsr #2
181 subhs \dividend, \dividend, \divisor, lsr #2
182 cmp \dividend, \divisor, lsr #3
183 subhs \dividend, \dividend, \divisor, lsr #3
185 mov \divisor, \divisor, lsr #4
186 subges \order, \order, #4
193 @ Either 1, 2 or 3 comparison/subtractions are left.
197 cmp \dividend, \divisor
198 subhs \dividend, \dividend, \divisor
199 mov \divisor, \divisor, lsr #1
200 3: cmp \dividend, \divisor
201 subhs \dividend, \dividend, \divisor
202 mov \divisor, \divisor, lsr #1
203 4: cmp \dividend, \divisor
204 subhs \dividend, \dividend, \divisor
209 #ifdef CONFIG_ARM_PATCH_IDIV
225 ARM_DIV_BODY r0, r1, r2, r3
234 12: ARM_DIV2_ORDER r1, r2
241 ENDPROC(__aeabi_uidiv)
242 EXPORT_SYMBOL(__udivsi3)
243 EXPORT_SYMBOL(__aeabi_uidiv)
248 subs r2, r1, #1 @ compare divisor with 1
250 cmpne r0, r1 @ compare dividend with divisor
252 tsthi r1, r2 @ see if divisor is power of 2
256 ARM_MOD_BODY r0, r1, r2, r3
262 EXPORT_SYMBOL(__umodsi3)
264 #ifdef CONFIG_ARM_PATCH_IDIV
273 eor ip, r0, r1 @ save the sign of the result.
275 rsbmi r1, r1, #0 @ loops below use unsigned.
276 subs r2, r1, #1 @ division by 1 or -1 ?
279 rsbmi r3, r0, #0 @ positive dividend value
282 tst r1, r2 @ divisor is power of 2 ?
285 ARM_DIV_BODY r3, r1, r0, r2
291 10: teq ip, r0 @ same sign ?
296 moveq r0, ip, asr #31
300 12: ARM_DIV2_ORDER r1, r2
309 ENDPROC(__aeabi_idiv)
310 EXPORT_SYMBOL(__divsi3)
311 EXPORT_SYMBOL(__aeabi_idiv)
318 rsbmi r1, r1, #0 @ loops below use unsigned.
319 movs ip, r0 @ preserve sign of dividend
320 rsbmi r0, r0, #0 @ if negative make positive
321 subs r2, r1, #1 @ compare divisor with 1
322 cmpne r0, r1 @ compare dividend with divisor
324 tsthi r1, r2 @ see if divisor is power of 2
328 ARM_MOD_BODY r0, r1, r2, r3
336 EXPORT_SYMBOL(__modsi3)
340 ENTRY(__aeabi_uidivmod)
342 UNWIND(.save {r0, r1, ip, lr} )
344 stmfd sp!, {r0, r1, ip, lr}
346 ldmfd sp!, {r1, r2, ip, lr}
352 ENDPROC(__aeabi_uidivmod)
353 EXPORT_SYMBOL(__aeabi_uidivmod)
355 ENTRY(__aeabi_idivmod)
357 UNWIND(.save {r0, r1, ip, lr} )
358 stmfd sp!, {r0, r1, ip, lr}
360 ldmfd sp!, {r1, r2, ip, lr}
366 ENDPROC(__aeabi_idivmod)
367 EXPORT_SYMBOL(__aeabi_idivmod)
377 mov r0, #0 @ About as wrong as it could be.