Commit | Line | Data |
---|---|---|
3b041227 TY |
1 | /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, |
2 | 2004, 2005, 2006 | |
3 | Free Software Foundation, Inc. | |
4 | ||
5 | This file is free software; you can redistribute it and/or modify it | |
6 | under the terms of the GNU General Public License as published by the | |
7 | Free Software Foundation; either version 2, or (at your option) any | |
8 | later version. | |
9 | ||
10 | In addition to the permissions in the GNU General Public License, the | |
11 | Free Software Foundation gives you unlimited permission to link the | |
12 | compiled version of this file into combinations with other programs, | |
13 | and to distribute those combinations without any restriction coming | |
14 | from the use of this file. (The General Public License restrictions | |
15 | do apply in other respects; for example, they cover modification of | |
16 | the file, and distribution when not linked into a combine | |
17 | executable.) | |
18 | ||
19 | This file is distributed in the hope that it will be useful, but | |
20 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
22 | General Public License for more details. | |
23 | ||
24 | You should have received a copy of the GNU General Public License | |
25 | along with this program; see the file COPYING. If not, write to | |
26 | the Free Software Foundation, 51 Franklin Street, Fifth Floor, | |
27 | Boston, MA 02110-1301, USA. */ | |
28 | ||
29 | !! libgcc routines for the Renesas / SuperH SH CPUs. | |
30 | !! Contributed by Steve Chamberlain. | |
31 | !! sac@cygnus.com | |
32 | ||
33 | !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines | |
34 | !! recoded in assembly by Toshiyasu Morita | |
35 | !! tm@netcom.com | |
36 | ||
37 | /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and | |
38 | ELF local label prefixes by J"orn Rennecke | |
39 | amylaar@cygnus.com */ | |
40 | ||
41 | /* This code used shld, thus is not suitable for SH1 / SH2. */ | |
42 | ||
43 | /* Signed / unsigned division without use of FPU, optimized for SH4. | |
44 | Uses a lookup table for divisors in the range -128 .. +128, and | |
45 | div1 with case distinction for larger divisors in three more ranges. | |
46 | The code is lumped together with the table to allow the use of mova. */ | |
47 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | |
48 | #define L_LSB 0 | |
49 | #define L_LSWMSB 1 | |
50 | #define L_MSWLSB 2 | |
51 | #else | |
52 | #define L_LSB 3 | |
53 | #define L_LSWMSB 2 | |
54 | #define L_MSWLSB 1 | |
55 | #endif | |
56 | ||
57 | .balign 4 | |
58 | .global __udivsi3_i4i | |
59 | .global __udivsi3_i4 | |
60 | .set __udivsi3_i4, __udivsi3_i4i | |
61 | .type __udivsi3_i4i, @function | |
62 | __udivsi3_i4i: | |
63 | mov.w c128_w, r1 | |
64 | div0u | |
65 | mov r4,r0 | |
66 | shlr8 r0 | |
67 | cmp/hi r1,r5 | |
68 | extu.w r5,r1 | |
69 | bf udiv_le128 | |
70 | cmp/eq r5,r1 | |
71 | bf udiv_ge64k | |
72 | shlr r0 | |
73 | mov r5,r1 | |
74 | shll16 r5 | |
75 | mov.l r4,@-r15 | |
76 | div1 r5,r0 | |
77 | mov.l r1,@-r15 | |
78 | div1 r5,r0 | |
79 | div1 r5,r0 | |
80 | bra udiv_25 | |
81 | div1 r5,r0 | |
82 | ||
83 | div_le128: | |
84 | mova div_table_ix,r0 | |
85 | bra div_le128_2 | |
86 | mov.b @(r0,r5),r1 | |
87 | udiv_le128: | |
88 | mov.l r4,@-r15 | |
89 | mova div_table_ix,r0 | |
90 | mov.b @(r0,r5),r1 | |
91 | mov.l r5,@-r15 | |
92 | div_le128_2: | |
93 | mova div_table_inv,r0 | |
94 | mov.l @(r0,r1),r1 | |
95 | mov r5,r0 | |
96 | tst #0xfe,r0 | |
97 | mova div_table_clz,r0 | |
98 | dmulu.l r1,r4 | |
99 | mov.b @(r0,r5),r1 | |
100 | bt/s div_by_1 | |
101 | mov r4,r0 | |
102 | mov.l @r15+,r5 | |
103 | sts mach,r0 | |
104 | /* clrt */ | |
105 | addc r4,r0 | |
106 | mov.l @r15+,r4 | |
107 | rotcr r0 | |
108 | rts | |
109 | shld r1,r0 | |
110 | ||
111 | div_by_1_neg: | |
112 | neg r4,r0 | |
113 | div_by_1: | |
114 | mov.l @r15+,r5 | |
115 | rts | |
116 | mov.l @r15+,r4 | |
117 | ||
118 | div_ge64k: | |
119 | bt/s div_r8 | |
120 | div0u | |
121 | shll8 r5 | |
122 | bra div_ge64k_2 | |
123 | div1 r5,r0 | |
124 | udiv_ge64k: | |
125 | cmp/hi r0,r5 | |
126 | mov r5,r1 | |
127 | bt udiv_r8 | |
128 | shll8 r5 | |
129 | mov.l r4,@-r15 | |
130 | div1 r5,r0 | |
131 | mov.l r1,@-r15 | |
132 | div_ge64k_2: | |
133 | div1 r5,r0 | |
134 | mov.l zero_l,r1 | |
135 | .rept 4 | |
136 | div1 r5,r0 | |
137 | .endr | |
138 | mov.l r1,@-r15 | |
139 | div1 r5,r0 | |
140 | mov.w m256_w,r1 | |
141 | div1 r5,r0 | |
142 | mov.b r0,@(L_LSWMSB,r15) | |
143 | xor r4,r0 | |
144 | and r1,r0 | |
145 | bra div_ge64k_end | |
146 | xor r4,r0 | |
147 | ||
148 | div_r8: | |
149 | shll16 r4 | |
150 | bra div_r8_2 | |
151 | shll8 r4 | |
152 | udiv_r8: | |
153 | mov.l r4,@-r15 | |
154 | shll16 r4 | |
155 | clrt | |
156 | shll8 r4 | |
157 | mov.l r5,@-r15 | |
158 | div_r8_2: | |
159 | rotcl r4 | |
160 | mov r0,r1 | |
161 | div1 r5,r1 | |
162 | mov r4,r0 | |
163 | rotcl r0 | |
164 | mov r5,r4 | |
165 | div1 r5,r1 | |
166 | .rept 5 | |
167 | rotcl r0; div1 r5,r1 | |
168 | .endr | |
169 | rotcl r0 | |
170 | mov.l @r15+,r5 | |
171 | div1 r4,r1 | |
172 | mov.l @r15+,r4 | |
173 | rts | |
174 | rotcl r0 | |
175 | ||
176 | .global __sdivsi3_i4i | |
177 | .global __sdivsi3_i4 | |
178 | .global __sdivsi3 | |
179 | .set __sdivsi3_i4, __sdivsi3_i4i | |
180 | .set __sdivsi3, __sdivsi3_i4i | |
181 | .type __sdivsi3_i4i, @function | |
182 | /* This is link-compatible with a __sdivsi3 call, | |
183 | but we effectively clobber only r1. */ | |
184 | __sdivsi3_i4i: | |
185 | mov.l r4,@-r15 | |
186 | cmp/pz r5 | |
187 | mov.w c128_w, r1 | |
188 | bt/s pos_divisor | |
189 | cmp/pz r4 | |
190 | mov.l r5,@-r15 | |
191 | neg r5,r5 | |
192 | bt/s neg_result | |
193 | cmp/hi r1,r5 | |
194 | neg r4,r4 | |
195 | pos_result: | |
196 | extu.w r5,r0 | |
197 | bf div_le128 | |
198 | cmp/eq r5,r0 | |
199 | mov r4,r0 | |
200 | shlr8 r0 | |
201 | bf/s div_ge64k | |
202 | cmp/hi r0,r5 | |
203 | div0u | |
204 | shll16 r5 | |
205 | div1 r5,r0 | |
206 | div1 r5,r0 | |
207 | div1 r5,r0 | |
208 | udiv_25: | |
209 | mov.l zero_l,r1 | |
210 | div1 r5,r0 | |
211 | div1 r5,r0 | |
212 | mov.l r1,@-r15 | |
213 | .rept 3 | |
214 | div1 r5,r0 | |
215 | .endr | |
216 | mov.b r0,@(L_MSWLSB,r15) | |
217 | xtrct r4,r0 | |
218 | swap.w r0,r0 | |
219 | .rept 8 | |
220 | div1 r5,r0 | |
221 | .endr | |
222 | mov.b r0,@(L_LSWMSB,r15) | |
223 | div_ge64k_end: | |
224 | .rept 8 | |
225 | div1 r5,r0 | |
226 | .endr | |
227 | mov.l @r15+,r4 ! zero-extension and swap using LS unit. | |
228 | extu.b r0,r0 | |
229 | mov.l @r15+,r5 | |
230 | or r4,r0 | |
231 | mov.l @r15+,r4 | |
232 | rts | |
233 | rotcl r0 | |
234 | ||
235 | div_le128_neg: | |
236 | tst #0xfe,r0 | |
237 | mova div_table_ix,r0 | |
238 | mov.b @(r0,r5),r1 | |
239 | mova div_table_inv,r0 | |
240 | bt/s div_by_1_neg | |
241 | mov.l @(r0,r1),r1 | |
242 | mova div_table_clz,r0 | |
243 | dmulu.l r1,r4 | |
244 | mov.b @(r0,r5),r1 | |
245 | mov.l @r15+,r5 | |
246 | sts mach,r0 | |
247 | /* clrt */ | |
248 | addc r4,r0 | |
249 | mov.l @r15+,r4 | |
250 | rotcr r0 | |
251 | shld r1,r0 | |
252 | rts | |
253 | neg r0,r0 | |
254 | ||
255 | pos_divisor: | |
256 | mov.l r5,@-r15 | |
257 | bt/s pos_result | |
258 | cmp/hi r1,r5 | |
259 | neg r4,r4 | |
260 | neg_result: | |
261 | extu.w r5,r0 | |
262 | bf div_le128_neg | |
263 | cmp/eq r5,r0 | |
264 | mov r4,r0 | |
265 | shlr8 r0 | |
266 | bf/s div_ge64k_neg | |
267 | cmp/hi r0,r5 | |
268 | div0u | |
269 | mov.l zero_l,r1 | |
270 | shll16 r5 | |
271 | div1 r5,r0 | |
272 | mov.l r1,@-r15 | |
273 | .rept 7 | |
274 | div1 r5,r0 | |
275 | .endr | |
276 | mov.b r0,@(L_MSWLSB,r15) | |
277 | xtrct r4,r0 | |
278 | swap.w r0,r0 | |
279 | .rept 8 | |
280 | div1 r5,r0 | |
281 | .endr | |
282 | mov.b r0,@(L_LSWMSB,r15) | |
283 | div_ge64k_neg_end: | |
284 | .rept 8 | |
285 | div1 r5,r0 | |
286 | .endr | |
287 | mov.l @r15+,r4 ! zero-extension and swap using LS unit. | |
288 | extu.b r0,r1 | |
289 | mov.l @r15+,r5 | |
290 | or r4,r1 | |
291 | div_r8_neg_end: | |
292 | mov.l @r15+,r4 | |
293 | rotcl r1 | |
294 | rts | |
295 | neg r1,r0 | |
296 | ||
297 | div_ge64k_neg: | |
298 | bt/s div_r8_neg | |
299 | div0u | |
300 | shll8 r5 | |
301 | mov.l zero_l,r1 | |
302 | .rept 6 | |
303 | div1 r5,r0 | |
304 | .endr | |
305 | mov.l r1,@-r15 | |
306 | div1 r5,r0 | |
307 | mov.w m256_w,r1 | |
308 | div1 r5,r0 | |
309 | mov.b r0,@(L_LSWMSB,r15) | |
310 | xor r4,r0 | |
311 | and r1,r0 | |
312 | bra div_ge64k_neg_end | |
313 | xor r4,r0 | |
314 | ||
315 | c128_w: | |
316 | .word 128 | |
317 | ||
318 | div_r8_neg: | |
319 | clrt | |
320 | shll16 r4 | |
321 | mov r4,r1 | |
322 | shll8 r1 | |
323 | mov r5,r4 | |
324 | .rept 7 | |
325 | rotcl r1; div1 r5,r0 | |
326 | .endr | |
327 | mov.l @r15+,r5 | |
328 | rotcl r1 | |
329 | bra div_r8_neg_end | |
330 | div1 r4,r0 | |
331 | ||
332 | m256_w: | |
333 | .word 0xff00 | |
334 | /* This table has been generated by divtab-sh4.c. */ | |
335 | .balign 4 | |
336 | div_table_clz: | |
337 | .byte 0 | |
338 | .byte 1 | |
339 | .byte 0 | |
340 | .byte -1 | |
341 | .byte -1 | |
342 | .byte -2 | |
343 | .byte -2 | |
344 | .byte -2 | |
345 | .byte -2 | |
346 | .byte -3 | |
347 | .byte -3 | |
348 | .byte -3 | |
349 | .byte -3 | |
350 | .byte -3 | |
351 | .byte -3 | |
352 | .byte -3 | |
353 | .byte -3 | |
354 | .byte -4 | |
355 | .byte -4 | |
356 | .byte -4 | |
357 | .byte -4 | |
358 | .byte -4 | |
359 | .byte -4 | |
360 | .byte -4 | |
361 | .byte -4 | |
362 | .byte -4 | |
363 | .byte -4 | |
364 | .byte -4 | |
365 | .byte -4 | |
366 | .byte -4 | |
367 | .byte -4 | |
368 | .byte -4 | |
369 | .byte -4 | |
370 | .byte -5 | |
371 | .byte -5 | |
372 | .byte -5 | |
373 | .byte -5 | |
374 | .byte -5 | |
375 | .byte -5 | |
376 | .byte -5 | |
377 | .byte -5 | |
378 | .byte -5 | |
379 | .byte -5 | |
380 | .byte -5 | |
381 | .byte -5 | |
382 | .byte -5 | |
383 | .byte -5 | |
384 | .byte -5 | |
385 | .byte -5 | |
386 | .byte -5 | |
387 | .byte -5 | |
388 | .byte -5 | |
389 | .byte -5 | |
390 | .byte -5 | |
391 | .byte -5 | |
392 | .byte -5 | |
393 | .byte -5 | |
394 | .byte -5 | |
395 | .byte -5 | |
396 | .byte -5 | |
397 | .byte -5 | |
398 | .byte -5 | |
399 | .byte -5 | |
400 | .byte -5 | |
401 | .byte -5 | |
402 | .byte -6 | |
403 | .byte -6 | |
404 | .byte -6 | |
405 | .byte -6 | |
406 | .byte -6 | |
407 | .byte -6 | |
408 | .byte -6 | |
409 | .byte -6 | |
410 | .byte -6 | |
411 | .byte -6 | |
412 | .byte -6 | |
413 | .byte -6 | |
414 | .byte -6 | |
415 | .byte -6 | |
416 | .byte -6 | |
417 | .byte -6 | |
418 | .byte -6 | |
419 | .byte -6 | |
420 | .byte -6 | |
421 | .byte -6 | |
422 | .byte -6 | |
423 | .byte -6 | |
424 | .byte -6 | |
425 | .byte -6 | |
426 | .byte -6 | |
427 | .byte -6 | |
428 | .byte -6 | |
429 | .byte -6 | |
430 | .byte -6 | |
431 | .byte -6 | |
432 | .byte -6 | |
433 | .byte -6 | |
434 | .byte -6 | |
435 | .byte -6 | |
436 | .byte -6 | |
437 | .byte -6 | |
438 | .byte -6 | |
439 | .byte -6 | |
440 | .byte -6 | |
441 | .byte -6 | |
442 | .byte -6 | |
443 | .byte -6 | |
444 | .byte -6 | |
445 | .byte -6 | |
446 | .byte -6 | |
447 | .byte -6 | |
448 | .byte -6 | |
449 | .byte -6 | |
450 | .byte -6 | |
451 | .byte -6 | |
452 | .byte -6 | |
453 | .byte -6 | |
454 | .byte -6 | |
455 | .byte -6 | |
456 | .byte -6 | |
457 | .byte -6 | |
458 | .byte -6 | |
459 | .byte -6 | |
460 | .byte -6 | |
461 | .byte -6 | |
462 | .byte -6 | |
463 | .byte -6 | |
464 | .byte -6 | |
465 | /* Lookup table translating positive divisor to index into table of | |
466 | normalized inverse. N.B. the '0' entry is also the last entry of the | |
467 | previous table, and causes an unaligned access for division by zero. */ | |
468 | div_table_ix: | |
469 | .byte -6 | |
470 | .byte -128 | |
471 | .byte -128 | |
472 | .byte 0 | |
473 | .byte -128 | |
474 | .byte -64 | |
475 | .byte 0 | |
476 | .byte 64 | |
477 | .byte -128 | |
478 | .byte -96 | |
479 | .byte -64 | |
480 | .byte -32 | |
481 | .byte 0 | |
482 | .byte 32 | |
483 | .byte 64 | |
484 | .byte 96 | |
485 | .byte -128 | |
486 | .byte -112 | |
487 | .byte -96 | |
488 | .byte -80 | |
489 | .byte -64 | |
490 | .byte -48 | |
491 | .byte -32 | |
492 | .byte -16 | |
493 | .byte 0 | |
494 | .byte 16 | |
495 | .byte 32 | |
496 | .byte 48 | |
497 | .byte 64 | |
498 | .byte 80 | |
499 | .byte 96 | |
500 | .byte 112 | |
501 | .byte -128 | |
502 | .byte -120 | |
503 | .byte -112 | |
504 | .byte -104 | |
505 | .byte -96 | |
506 | .byte -88 | |
507 | .byte -80 | |
508 | .byte -72 | |
509 | .byte -64 | |
510 | .byte -56 | |
511 | .byte -48 | |
512 | .byte -40 | |
513 | .byte -32 | |
514 | .byte -24 | |
515 | .byte -16 | |
516 | .byte -8 | |
517 | .byte 0 | |
518 | .byte 8 | |
519 | .byte 16 | |
520 | .byte 24 | |
521 | .byte 32 | |
522 | .byte 40 | |
523 | .byte 48 | |
524 | .byte 56 | |
525 | .byte 64 | |
526 | .byte 72 | |
527 | .byte 80 | |
528 | .byte 88 | |
529 | .byte 96 | |
530 | .byte 104 | |
531 | .byte 112 | |
532 | .byte 120 | |
533 | .byte -128 | |
534 | .byte -124 | |
535 | .byte -120 | |
536 | .byte -116 | |
537 | .byte -112 | |
538 | .byte -108 | |
539 | .byte -104 | |
540 | .byte -100 | |
541 | .byte -96 | |
542 | .byte -92 | |
543 | .byte -88 | |
544 | .byte -84 | |
545 | .byte -80 | |
546 | .byte -76 | |
547 | .byte -72 | |
548 | .byte -68 | |
549 | .byte -64 | |
550 | .byte -60 | |
551 | .byte -56 | |
552 | .byte -52 | |
553 | .byte -48 | |
554 | .byte -44 | |
555 | .byte -40 | |
556 | .byte -36 | |
557 | .byte -32 | |
558 | .byte -28 | |
559 | .byte -24 | |
560 | .byte -20 | |
561 | .byte -16 | |
562 | .byte -12 | |
563 | .byte -8 | |
564 | .byte -4 | |
565 | .byte 0 | |
566 | .byte 4 | |
567 | .byte 8 | |
568 | .byte 12 | |
569 | .byte 16 | |
570 | .byte 20 | |
571 | .byte 24 | |
572 | .byte 28 | |
573 | .byte 32 | |
574 | .byte 36 | |
575 | .byte 40 | |
576 | .byte 44 | |
577 | .byte 48 | |
578 | .byte 52 | |
579 | .byte 56 | |
580 | .byte 60 | |
581 | .byte 64 | |
582 | .byte 68 | |
583 | .byte 72 | |
584 | .byte 76 | |
585 | .byte 80 | |
586 | .byte 84 | |
587 | .byte 88 | |
588 | .byte 92 | |
589 | .byte 96 | |
590 | .byte 100 | |
591 | .byte 104 | |
592 | .byte 108 | |
593 | .byte 112 | |
594 | .byte 116 | |
595 | .byte 120 | |
596 | .byte 124 | |
597 | .byte -128 | |
598 | /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ | |
599 | .balign 4 | |
600 | zero_l: | |
601 | .long 0x0 | |
602 | .long 0xF81F81F9 | |
603 | .long 0xF07C1F08 | |
604 | .long 0xE9131AC0 | |
605 | .long 0xE1E1E1E2 | |
606 | .long 0xDAE6076C | |
607 | .long 0xD41D41D5 | |
608 | .long 0xCD856891 | |
609 | .long 0xC71C71C8 | |
610 | .long 0xC0E07039 | |
611 | .long 0xBACF914D | |
612 | .long 0xB4E81B4F | |
613 | .long 0xAF286BCB | |
614 | .long 0xA98EF607 | |
615 | .long 0xA41A41A5 | |
616 | .long 0x9EC8E952 | |
617 | .long 0x9999999A | |
618 | .long 0x948B0FCE | |
619 | .long 0x8F9C18FA | |
620 | .long 0x8ACB90F7 | |
621 | .long 0x86186187 | |
622 | .long 0x81818182 | |
623 | .long 0x7D05F418 | |
624 | .long 0x78A4C818 | |
625 | .long 0x745D1746 | |
626 | .long 0x702E05C1 | |
627 | .long 0x6C16C16D | |
628 | .long 0x68168169 | |
629 | .long 0x642C8591 | |
630 | .long 0x60581606 | |
631 | .long 0x5C9882BA | |
632 | .long 0x58ED2309 | |
633 | div_table_inv: | |
634 | .long 0x55555556 | |
635 | .long 0x51D07EAF | |
636 | .long 0x4E5E0A73 | |
637 | .long 0x4AFD6A06 | |
638 | .long 0x47AE147B | |
639 | .long 0x446F8657 | |
640 | .long 0x41414142 | |
641 | .long 0x3E22CBCF | |
642 | .long 0x3B13B13C | |
643 | .long 0x38138139 | |
644 | .long 0x3521CFB3 | |
645 | .long 0x323E34A3 | |
646 | .long 0x2F684BDB | |
647 | .long 0x2C9FB4D9 | |
648 | .long 0x29E4129F | |
649 | .long 0x27350B89 | |
650 | .long 0x24924925 | |
651 | .long 0x21FB7813 | |
652 | .long 0x1F7047DD | |
653 | .long 0x1CF06ADB | |
654 | .long 0x1A7B9612 | |
655 | .long 0x18118119 | |
656 | .long 0x15B1E5F8 | |
657 | .long 0x135C8114 | |
658 | .long 0x11111112 | |
659 | .long 0xECF56BF | |
660 | .long 0xC9714FC | |
661 | .long 0xA6810A7 | |
662 | .long 0x8421085 | |
663 | .long 0x624DD30 | |
664 | .long 0x4104105 | |
665 | .long 0x2040811 | |
666 | /* maximum error: 0.987342 scaled: 0.921875*/ |