Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /*---------------------------------------------------------------------------+ |
2 | | polynomial_Xsig.S | | |
3 | | | | |
4 | | Fixed point arithmetic polynomial evaluation. | | |
5 | | | | |
6 | | Copyright (C) 1992,1993,1994,1995 | | |
7 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | |
8 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | |
9 | | | | |
10 | | Call from C as: | | |
11 | | void polynomial_Xsig(Xsig *accum, unsigned long long x, | | |
12 | | unsigned long long terms[], int n) | | |
13 | | | | |
14 | | Computes: | | |
15 | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | | |
16 | | and adds the result to the 12 byte Xsig. | | |
17 | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | | |
18 | | precision. | | |
19 | | | | |
20 | | This function must be used carefully: most overflow of intermediate | | |
21 | | results is controlled, but overflow of the result is not. | | |
22 | | | | |
23 | +---------------------------------------------------------------------------*/ | |
24 | .file "polynomial_Xsig.S" | |
25 | ||
26 | #include "fpu_emu.h" | |
27 | ||
28 | ||
29 | #define TERM_SIZE $8 | |
30 | #define SUM_MS -20(%ebp) /* sum ms long */ | |
31 | #define SUM_MIDDLE -24(%ebp) /* sum middle long */ | |
32 | #define SUM_LS -28(%ebp) /* sum ls long */ | |
33 | #define ACCUM_MS -4(%ebp) /* accum ms long */ | |
34 | #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ | |
35 | #define ACCUM_LS -12(%ebp) /* accum ls long */ | |
36 | #define OVERFLOWED -16(%ebp) /* addition overflow flag */ | |
37 | ||
38 | .text | |
39 | ENTRY(polynomial_Xsig) | |
40 | pushl %ebp | |
41 | movl %esp,%ebp | |
42 | subl $32,%esp | |
43 | pushl %esi | |
44 | pushl %edi | |
45 | pushl %ebx | |
46 | ||
47 | movl PARAM2,%esi /* x */ | |
48 | movl PARAM3,%edi /* terms */ | |
49 | ||
50 | movl TERM_SIZE,%eax | |
51 | mull PARAM4 /* n */ | |
52 | addl %eax,%edi | |
53 | ||
54 | movl 4(%edi),%edx /* terms[n] */ | |
55 | movl %edx,SUM_MS | |
56 | movl (%edi),%edx /* terms[n] */ | |
57 | movl %edx,SUM_MIDDLE | |
58 | xor %eax,%eax | |
59 | movl %eax,SUM_LS | |
60 | movb %al,OVERFLOWED | |
61 | ||
62 | subl TERM_SIZE,%edi | |
63 | decl PARAM4 | |
64 | js L_accum_done | |
65 | ||
66 | L_accum_loop: | |
67 | xor %eax,%eax | |
68 | movl %eax,ACCUM_MS | |
69 | movl %eax,ACCUM_MIDDLE | |
70 | ||
71 | movl SUM_MIDDLE,%eax | |
72 | mull (%esi) /* x ls long */ | |
73 | movl %edx,ACCUM_LS | |
74 | ||
75 | movl SUM_MIDDLE,%eax | |
76 | mull 4(%esi) /* x ms long */ | |
77 | addl %eax,ACCUM_LS | |
78 | adcl %edx,ACCUM_MIDDLE | |
79 | adcl $0,ACCUM_MS | |
80 | ||
81 | movl SUM_MS,%eax | |
82 | mull (%esi) /* x ls long */ | |
83 | addl %eax,ACCUM_LS | |
84 | adcl %edx,ACCUM_MIDDLE | |
85 | adcl $0,ACCUM_MS | |
86 | ||
87 | movl SUM_MS,%eax | |
88 | mull 4(%esi) /* x ms long */ | |
89 | addl %eax,ACCUM_MIDDLE | |
90 | adcl %edx,ACCUM_MS | |
91 | ||
92 | testb $0xff,OVERFLOWED | |
93 | jz L_no_overflow | |
94 | ||
95 | movl (%esi),%eax | |
96 | addl %eax,ACCUM_MIDDLE | |
97 | movl 4(%esi),%eax | |
98 | adcl %eax,ACCUM_MS /* This could overflow too */ | |
99 | ||
100 | L_no_overflow: | |
101 | ||
102 | /* | |
103 | * Now put the sum of next term and the accumulator | |
104 | * into the sum register | |
105 | */ | |
106 | movl ACCUM_LS,%eax | |
107 | addl (%edi),%eax /* term ls long */ | |
108 | movl %eax,SUM_LS | |
109 | movl ACCUM_MIDDLE,%eax | |
110 | adcl (%edi),%eax /* term ls long */ | |
111 | movl %eax,SUM_MIDDLE | |
112 | movl ACCUM_MS,%eax | |
113 | adcl 4(%edi),%eax /* term ms long */ | |
114 | movl %eax,SUM_MS | |
115 | sbbb %al,%al | |
116 | movb %al,OVERFLOWED /* Used in the next iteration */ | |
117 | ||
118 | subl TERM_SIZE,%edi | |
119 | decl PARAM4 | |
120 | jns L_accum_loop | |
121 | ||
122 | L_accum_done: | |
123 | movl PARAM1,%edi /* accum */ | |
124 | movl SUM_LS,%eax | |
125 | addl %eax,(%edi) | |
126 | movl SUM_MIDDLE,%eax | |
127 | adcl %eax,4(%edi) | |
128 | movl SUM_MS,%eax | |
129 | adcl %eax,8(%edi) | |
130 | ||
131 | popl %ebx | |
132 | popl %edi | |
133 | popl %esi | |
134 | leave | |
135 | ret |