arch/arm/lib/lib1funcs.S

   1 /*
   2  * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
   3  *
   4  * Author: Nicolas Pitre <nico@fluxnic.net>
   5  *   - contributed to gcc-3.4 on Sep 30, 2003
   6  *   - adapted for the Linux kernel on Oct 2, 2003
   7  */
   8
   9 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  10
  11 This file is free software; you can redistribute it and/or modify it
  12 under the terms of the GNU General Public License as published by the
  13 Free Software Foundation; either version 2, or (at your option) any
  14 later version.
  15
  16 In addition to the permissions in the GNU General Public License, the
  17 Free Software Foundation gives you unlimited permission to link the
  18 compiled version of this file into combinations with other programs,
  19 and to distribute those combinations without any restriction coming
  20 from the use of this file.  (The General Public License restrictions
  21 do apply in other respects; for example, they cover modification of
  22 the file, and distribution when not linked into a combine
  23 executable.)
  24
  25 This file is distributed in the hope that it will be useful, but
  26 WITHOUT ANY WARRANTY; without even the implied warranty of
  27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  28 General Public License for more details.
  29
  30 You should have received a copy of the GNU General Public License
  31 along with this program; see the file COPYING.  If not, write to
  32 the Free Software Foundation, 59 Temple Place - Suite 330,
  33 Boston, MA 02111-1307, USA.  */
  34
  35
  36 #include <linux/linkage.h>
  37 #include <asm/assembler.h>
  38
  39
  40 .macro ARM_DIV_BODY dividend, divisor, result, curbit
  41
  42 #if __LINUX_ARM_ARCH__ >= 5
  43
  44         clz     \curbit, \divisor
  45         clz     \result, \dividend
  46         sub     \result, \curbit, \result
  47         mov     \curbit, #1
  48         mov     \divisor, \divisor, lsl \result
  49         mov     \curbit, \curbit, lsl \result
  50         mov     \result, #0
  51
  52 #else
  53
  54         @ Initially shift the divisor left 3 bits if possible,
  55         @ set curbit accordingly.  This allows for curbit to be located
  56         @ at the left end of each 4 bit nibbles in the division loop
  57         @ to save one loop in most cases.
  58         tst     \divisor, #0xe0000000
  59         moveq   \divisor, \divisor, lsl #3
  60         moveq   \curbit, #8
  61         movne   \curbit, #1
  62
  63         @ Unless the divisor is very big, shift it up in multiples of
  64         @ four bits, since this is the amount of unwinding in the main
  65         @ division loop.  Continue shifting until the divisor is
  66         @ larger than the dividend.
  67 1:      cmp     \divisor, #0x10000000
  68         cmplo   \divisor, \dividend
  69         movlo   \divisor, \divisor, lsl #4
  70         movlo   \curbit, \curbit, lsl #4
  71         blo     1b
  72
  73         @ For very big divisors, we must shift it a bit at a time, or
  74         @ we will be in danger of overflowing.
  75 1:      cmp     \divisor, #0x80000000
  76         cmplo   \divisor, \dividend
  77         movlo   \divisor, \divisor, lsl #1
  78         movlo   \curbit, \curbit, lsl #1
  79         blo     1b
  80
  81         mov     \result, #0
  82
  83 #endif
  84
  85         @ Division loop
  86 1:      cmp     \dividend, \divisor
  87         subhs   \dividend, \dividend, \divisor
  88         orrhs   \result,   \result,   \curbit
  89         cmp     \dividend, \divisor,  lsr #1
  90         subhs   \dividend, \dividend, \divisor, lsr #1
  91         orrhs   \result,   \result,   \curbit,  lsr #1
  92         cmp     \dividend, \divisor,  lsr #2
  93         subhs   \dividend, \dividend, \divisor, lsr #2
  94         orrhs   \result,   \result,   \curbit,  lsr #2
  95         cmp     \dividend, \divisor,  lsr #3
  96         subhs   \dividend, \dividend, \divisor, lsr #3
  97         orrhs   \result,   \result,   \curbit,  lsr #3
  98         cmp     \dividend, #0                   @ Early termination?
  99         movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
 100         movne   \divisor,  \divisor, lsr #4
 101         bne     1b
 102
 103 .endm
 104
 105
 106 .macro ARM_DIV2_ORDER divisor, order
 107
 108 #if __LINUX_ARM_ARCH__ >= 5
 109
 110         clz     \order, \divisor
 111         rsb     \order, \order, #31
 112
 113 #else
 114
 115         cmp     \divisor, #(1 << 16)
 116         movhs   \divisor, \divisor, lsr #16
 117         movhs   \order, #16
 118         movlo   \order, #0
 119
 120         cmp     \divisor, #(1 << 8)
 121         movhs   \divisor, \divisor, lsr #8
 122         addhs   \order, \order, #8
 123
 124         cmp     \divisor, #(1 << 4)
 125         movhs   \divisor, \divisor, lsr #4
 126         addhs   \order, \order, #4
 127
 128         cmp     \divisor, #(1 << 2)
 129         addhi   \order, \order, #3
 130         addls   \order, \order, \divisor, lsr #1
 131
 132 #endif
 133
 134 .endm
 135
 136
 137 .macro ARM_MOD_BODY dividend, divisor, order, spare
 138
 139 #if __LINUX_ARM_ARCH__ >= 5
 140
 141         clz     \order, \divisor
 142         clz     \spare, \dividend
 143         sub     \order, \order, \spare
 144         mov     \divisor, \divisor, lsl \order
 145
 146 #else
 147
 148         mov     \order, #0
 149
 150         @ Unless the divisor is very big, shift it up in multiples of
 151         @ four bits, since this is the amount of unwinding in the main
 152         @ division loop.  Continue shifting until the divisor is
 153         @ larger than the dividend.
 154 1:      cmp     \divisor, #0x10000000
 155         cmplo   \divisor, \dividend
 156         movlo   \divisor, \divisor, lsl #4
 157         addlo   \order, \order, #4
 158         blo     1b
 159
 160         @ For very big divisors, we must shift it a bit at a time, or
 161         @ we will be in danger of overflowing.
 162 1:      cmp     \divisor, #0x80000000
 163         cmplo   \divisor, \dividend
 164         movlo   \divisor, \divisor, lsl #1
 165         addlo   \order, \order, #1
 166         blo     1b
 167
 168 #endif
 169
 170         @ Perform all needed substractions to keep only the reminder.
 171         @ Do comparisons in batch of 4 first.
 172         subs    \order, \order, #3              @ yes, 3 is intended here
 173         blt     2f
 174
 175 1:      cmp     \dividend, \divisor
 176         subhs   \dividend, \dividend, \divisor
 177         cmp     \dividend, \divisor,  lsr #1
 178         subhs   \dividend, \dividend, \divisor, lsr #1
 179         cmp     \dividend, \divisor,  lsr #2
 180         subhs   \dividend, \dividend, \divisor, lsr #2
 181         cmp     \dividend, \divisor,  lsr #3
 182         subhs   \dividend, \dividend, \divisor, lsr #3
 183         cmp     \dividend, #1
 184         mov     \divisor, \divisor, lsr #4
 185         subges  \order, \order, #4
 186         bge     1b
 187
 188         tst     \order, #3
 189         teqne   \dividend, #0
 190         beq     5f
 191
 192         @ Either 1, 2 or 3 comparison/substractions are left.
 193 2:      cmn     \order, #2
 194         blt     4f
 195         beq     3f
 196         cmp     \dividend, \divisor
 197         subhs   \dividend, \dividend, \divisor
 198         mov     \divisor,  \divisor,  lsr #1
 199 3:      cmp     \dividend, \divisor
 200         subhs   \dividend, \dividend, \divisor
 201         mov     \divisor,  \divisor,  lsr #1
 202 4:      cmp     \dividend, \divisor
 203         subhs   \dividend, \dividend, \divisor
 204 5:
 205 .endm
 206
 207
 208 ENTRY(__udivsi3)
 209 ENTRY(__aeabi_uidiv)
 210
 211         subs    r2, r1, #1
 212         moveq   pc, lr
 213         bcc     Ldiv0
 214         cmp     r0, r1
 215         bls     11f
 216         tst     r1, r2
 217         beq     12f
 218
 219         ARM_DIV_BODY r0, r1, r2, r3
 220
 221         mov     r0, r2
 222         mov     pc, lr
 223
 224 11:     moveq   r0, #1
 225         movne   r0, #0
 226         mov     pc, lr
 227
 228 12:     ARM_DIV2_ORDER r1, r2
 229
 230         mov     r0, r0, lsr r2
 231         mov     pc, lr
 232
 233 ENDPROC(__udivsi3)
 234 ENDPROC(__aeabi_uidiv)
 235
 236 ENTRY(__umodsi3)
 237
 238         subs    r2, r1, #1                      @ compare divisor with 1
 239         bcc     Ldiv0
 240         cmpne   r0, r1                          @ compare dividend with divisor
 241         moveq   r0, #0
 242         tsthi   r1, r2                          @ see if divisor is power of 2
 243         andeq   r0, r0, r2
 244         movls   pc, lr
 245
 246         ARM_MOD_BODY r0, r1, r2, r3
 247
 248         mov     pc, lr
 249
 250 ENDPROC(__umodsi3)
 251
 252 ENTRY(__divsi3)
 253 ENTRY(__aeabi_idiv)
 254
 255         cmp     r1, #0
 256         eor     ip, r0, r1                      @ save the sign of the result.
 257         beq     Ldiv0
 258         rsbmi   r1, r1, #0                      @ loops below use unsigned.
 259         subs    r2, r1, #1                      @ division by 1 or -1 ?
 260         beq     10f
 261         movs    r3, r0
 262         rsbmi   r3, r0, #0                      @ positive dividend value
 263         cmp     r3, r1
 264         bls     11f
 265         tst     r1, r2                          @ divisor is power of 2 ?
 266         beq     12f
 267
 268         ARM_DIV_BODY r3, r1, r0, r2
 269
 270         cmp     ip, #0
 271         rsbmi   r0, r0, #0
 272         mov     pc, lr
 273
 274 10:     teq     ip, r0                          @ same sign ?
 275         rsbmi   r0, r0, #0
 276         mov     pc, lr
 277
 278 11:     movlo   r0, #0
 279         moveq   r0, ip, asr #31
 280         orreq   r0, r0, #1
 281         mov     pc, lr
 282
 283 12:     ARM_DIV2_ORDER r1, r2
 284
 285         cmp     ip, #0
 286         mov     r0, r3, lsr r2
 287         rsbmi   r0, r0, #0
 288         mov     pc, lr
 289
 290 ENDPROC(__divsi3)
 291 ENDPROC(__aeabi_idiv)
 292
 293 ENTRY(__modsi3)
 294
 295         cmp     r1, #0
 296         beq     Ldiv0
 297         rsbmi   r1, r1, #0                      @ loops below use unsigned.
 298         movs    ip, r0                          @ preserve sign of dividend
 299         rsbmi   r0, r0, #0                      @ if negative make positive
 300         subs    r2, r1, #1                      @ compare divisor with 1
 301         cmpne   r0, r1                          @ compare dividend with divisor
 302         moveq   r0, #0
 303         tsthi   r1, r2                          @ see if divisor is power of 2
 304         andeq   r0, r0, r2
 305         bls     10f
 306
 307         ARM_MOD_BODY r0, r1, r2, r3
 308
 309 10:     cmp     ip, #0
 310         rsbmi   r0, r0, #0
 311         mov     pc, lr
 312
 313 ENDPROC(__modsi3)
 314
 315 #ifdef CONFIG_AEABI
 316
 317 ENTRY(__aeabi_uidivmod)
 318
 319         stmfd   sp!, {r0, r1, ip, lr}
 320         bl      __aeabi_uidiv
 321         ldmfd   sp!, {r1, r2, ip, lr}
 322         mul     r3, r0, r2
 323         sub     r1, r1, r3
 324         mov     pc, lr
 325
 326 ENDPROC(__aeabi_uidivmod)
 327
 328 ENTRY(__aeabi_idivmod)
 329
 330         stmfd   sp!, {r0, r1, ip, lr}
 331         bl      __aeabi_idiv
 332         ldmfd   sp!, {r1, r2, ip, lr}
 333         mul     r3, r0, r2
 334         sub     r1, r1, r3
 335         mov     pc, lr
 336
 337 ENDPROC(__aeabi_idivmod)
 338
 339 #endif
 340
 341 Ldiv0:
 342
 343         str     lr, [sp, #-8]!
 344         bl      __div0
 345         mov     r0, #0                  @ About as wrong as it could be.
 346         ldr     pc, [sp], #8
 347
 348