Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Basic four-word fraction declaration and manipulation. | |
3 | * | |
4 | * When adding quadword support for 32 bit machines, we need | |
5 | * to be a little careful as double multiply uses some of these | |
6 | * macros: (in op-2.h) | |
7 | * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4, | |
8 | * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4 | |
9 | * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use | |
10 | * _FP_FRAC_DECL_4: it appears to be broken and is not used | |
11 | * anywhere anyway. ) | |
12 | * | |
13 | * I've now fixed all the macros that were here from the sparc64 code. | |
14 | * [*none* of the shift macros were correct!] -- PMM 02/1998 | |
15 | * | |
16 | * The only quadword stuff that remains to be coded is: | |
17 | * 1) the conversion to/from ints, which requires | |
18 | * that we check (in op-common.h) that the following do the right thing | |
19 | * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt) | |
20 | * 2) multiply, divide and sqrt, which require: | |
21 | * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q), | |
22 | * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to | |
23 | * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h. | |
24 | * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for | |
25 | * these; they are used nowhere else. ] | |
26 | */ | |
27 | ||
28 | #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] | |
29 | #define _FP_FRAC_COPY_4(D,S) \ | |
30 | (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ | |
31 | D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) | |
32 | /* The _FP_FRAC_SET_n(X,I) macro is intended for use with another | |
33 | * macro such as _FP_ZEROFRAC_n which returns n comma separated values. | |
34 | * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3) | |
35 | * which just assigns the In values to the array X##_f[]. | |
36 | * This is why the number of parameters doesn't appear to match | |
37 | * at first glance... -- PMM | |
38 | */ | |
39 | #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) | |
40 | #define _FP_FRAC_HIGH_4(X) (X##_f[3]) | |
41 | #define _FP_FRAC_LOW_4(X) (X##_f[0]) | |
42 | #define _FP_FRAC_WORD_4(X,w) (X##_f[w]) | |
43 | ||
44 | #define _FP_FRAC_SLL_4(X,N) \ | |
45 | do { \ | |
46 | _FP_I_TYPE _up, _down, _skip, _i; \ | |
47 | _skip = (N) / _FP_W_TYPE_SIZE; \ | |
48 | _up = (N) % _FP_W_TYPE_SIZE; \ | |
49 | _down = _FP_W_TYPE_SIZE - _up; \ | |
50 | for (_i = 3; _i > _skip; --_i) \ | |
51 | X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down; \ | |
52 | /* bugfixed: was X##_f[_i] <<= _up; -- PMM 02/1998 */ \ | |
53 | X##_f[_i] = X##_f[0] << _up; \ | |
54 | for (--_i; _i >= 0; --_i) \ | |
55 | X##_f[_i] = 0; \ | |
56 | } while (0) | |
57 | ||
58 | /* This one was broken too */ | |
59 | #define _FP_FRAC_SRL_4(X,N) \ | |
60 | do { \ | |
61 | _FP_I_TYPE _up, _down, _skip, _i; \ | |
62 | _skip = (N) / _FP_W_TYPE_SIZE; \ | |
63 | _down = (N) % _FP_W_TYPE_SIZE; \ | |
64 | _up = _FP_W_TYPE_SIZE - _down; \ | |
65 | for (_i = 0; _i < 3-_skip; ++_i) \ | |
66 | X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \ | |
67 | X##_f[_i] = X##_f[3] >> _down; \ | |
68 | for (++_i; _i < 4; ++_i) \ | |
69 | X##_f[_i] = 0; \ | |
70 | } while (0) | |
71 | ||
72 | ||
73 | /* Right shift with sticky-lsb. | |
74 | * What this actually means is that we do a standard right-shift, | |
75 | * but that if any of the bits that fall off the right hand side | |
76 | * were one then we always set the LSbit. | |
77 | */ | |
78 | #define _FP_FRAC_SRS_4(X,N,size) \ | |
79 | do { \ | |
80 | _FP_I_TYPE _up, _down, _skip, _i; \ | |
81 | _FP_W_TYPE _s; \ | |
82 | _skip = (N) / _FP_W_TYPE_SIZE; \ | |
83 | _down = (N) % _FP_W_TYPE_SIZE; \ | |
84 | _up = _FP_W_TYPE_SIZE - _down; \ | |
85 | for (_s = _i = 0; _i < _skip; ++_i) \ | |
86 | _s |= X##_f[_i]; \ | |
87 | _s |= X##_f[_i] << _up; \ | |
88 | /* s is now != 0 if we want to set the LSbit */ \ | |
89 | for (_i = 0; _i < 3-_skip; ++_i) \ | |
90 | X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \ | |
91 | X##_f[_i] = X##_f[3] >> _down; \ | |
92 | for (++_i; _i < 4; ++_i) \ | |
93 | X##_f[_i] = 0; \ | |
94 | /* don't fix the LSB until the very end when we're sure f[0] is stable */ \ | |
95 | X##_f[0] |= (_s != 0); \ | |
96 | } while (0) | |
97 | ||
98 | #define _FP_FRAC_ADD_4(R,X,Y) \ | |
99 | __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ | |
100 | X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ | |
101 | Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) | |
102 | ||
103 | #define _FP_FRAC_SUB_4(R,X,Y) \ | |
104 | __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ | |
105 | X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ | |
106 | Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) | |
107 | ||
108 | #define _FP_FRAC_ADDI_4(X,I) \ | |
109 | __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) | |
110 | ||
111 | #define _FP_ZEROFRAC_4 0,0,0,0 | |
112 | #define _FP_MINFRAC_4 0,0,0,1 | |
113 | ||
114 | #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) | |
115 | #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0) | |
116 | #define _FP_FRAC_OVERP_4(fs,X) (X##_f[0] & _FP_OVERFLOW_##fs) | |
117 | ||
118 | #define _FP_FRAC_EQ_4(X,Y) \ | |
119 | (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \ | |
120 | && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) | |
121 | ||
122 | #define _FP_FRAC_GT_4(X,Y) \ | |
123 | (X##_f[3] > Y##_f[3] || \ | |
124 | (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ | |
125 | (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ | |
126 | (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \ | |
127 | )) \ | |
128 | )) \ | |
129 | ) | |
130 | ||
131 | #define _FP_FRAC_GE_4(X,Y) \ | |
132 | (X##_f[3] > Y##_f[3] || \ | |
133 | (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ | |
134 | (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ | |
135 | (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \ | |
136 | )) \ | |
137 | )) \ | |
138 | ) | |
139 | ||
140 | ||
141 | #define _FP_FRAC_CLZ_4(R,X) \ | |
142 | do { \ | |
143 | if (X##_f[3]) \ | |
144 | { \ | |
145 | __FP_CLZ(R,X##_f[3]); \ | |
146 | } \ | |
147 | else if (X##_f[2]) \ | |
148 | { \ | |
149 | __FP_CLZ(R,X##_f[2]); \ | |
150 | R += _FP_W_TYPE_SIZE; \ | |
151 | } \ | |
152 | else if (X##_f[1]) \ | |
153 | { \ | |
154 | __FP_CLZ(R,X##_f[2]); \ | |
155 | R += _FP_W_TYPE_SIZE*2; \ | |
156 | } \ | |
157 | else \ | |
158 | { \ | |
159 | __FP_CLZ(R,X##_f[0]); \ | |
160 | R += _FP_W_TYPE_SIZE*3; \ | |
161 | } \ | |
162 | } while(0) | |
163 | ||
164 | ||
165 | #define _FP_UNPACK_RAW_4(fs, X, val) \ | |
166 | do { \ | |
167 | union _FP_UNION_##fs _flo; _flo.flt = (val); \ | |
168 | X##_f[0] = _flo.bits.frac0; \ | |
169 | X##_f[1] = _flo.bits.frac1; \ | |
170 | X##_f[2] = _flo.bits.frac2; \ | |
171 | X##_f[3] = _flo.bits.frac3; \ | |
172 | X##_e = _flo.bits.exp; \ | |
173 | X##_s = _flo.bits.sign; \ | |
174 | } while (0) | |
175 | ||
176 | #define _FP_PACK_RAW_4(fs, val, X) \ | |
177 | do { \ | |
178 | union _FP_UNION_##fs _flo; \ | |
179 | _flo.bits.frac0 = X##_f[0]; \ | |
180 | _flo.bits.frac1 = X##_f[1]; \ | |
181 | _flo.bits.frac2 = X##_f[2]; \ | |
182 | _flo.bits.frac3 = X##_f[3]; \ | |
183 | _flo.bits.exp = X##_e; \ | |
184 | _flo.bits.sign = X##_s; \ | |
185 | (val) = _flo.flt; \ | |
186 | } while (0) | |
187 | ||
188 | ||
189 | /* | |
190 | * Internals | |
191 | */ | |
192 | ||
193 | #define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \ | |
194 | (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) | |
195 | ||
196 | #ifndef __FP_FRAC_ADD_4 | |
e3bc3a09 LY |
197 | #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ |
198 | do { \ | |
199 | int _c1, _c2, _c3; \ | |
200 | r0 = x0 + y0; \ | |
201 | _c1 = r0 < x0; \ | |
202 | r1 = x1 + y1; \ | |
203 | _c2 = r1 < x1; \ | |
204 | r1 += _c1; \ | |
205 | _c2 |= r1 < _c1; \ | |
206 | r2 = x2 + y2; \ | |
207 | _c3 = r2 < x2; \ | |
208 | r2 += _c2; \ | |
209 | _c3 |= r2 < _c2; \ | |
210 | r3 = x3 + y3 + _c3; \ | |
211 | } while (0) | |
1da177e4 LT |
212 | #endif |
213 | ||
214 | #ifndef __FP_FRAC_SUB_4 | |
e3bc3a09 LY |
215 | #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ |
216 | do { \ | |
217 | int _c1, _c2, _c3; \ | |
218 | r0 = x0 - y0; \ | |
219 | _c1 = r0 > x0; \ | |
220 | r1 = x1 - y1; \ | |
221 | _c2 = r1 > x1; \ | |
222 | r1 -= _c1; \ | |
223 | _c2 |= r1 > _c1; \ | |
224 | r2 = x2 - y2; \ | |
225 | _c3 = r2 > x2; \ | |
226 | r2 -= _c2; \ | |
227 | _c3 |= r2 > _c2; \ | |
228 | r3 = x3 - y3 - _c3; \ | |
229 | } while (0) | |
1da177e4 LT |
230 | #endif |
231 | ||
232 | #ifndef __FP_FRAC_ADDI_4 | |
233 | /* I always wanted to be a lisp programmer :-> */ | |
234 | #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ | |
235 | (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2))) | |
236 | #endif | |
237 | ||
238 | /* Convert FP values between word sizes. This appears to be more | |
239 | * complicated than I'd have expected it to be, so these might be | |
240 | * wrong... These macros are in any case somewhat bogus because they | |
241 | * use information about what various FRAC_n variables look like | |
242 | * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do | |
243 | * the ones in op-2.h and op-1.h. | |
244 | */ | |
245 | #define _FP_FRAC_CONV_1_4(dfs, sfs, D, S) \ | |
246 | do { \ | |
247 | _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ | |
248 | _FP_WFRACBITS_##sfs); \ | |
249 | D##_f = S##_f[0]; \ | |
250 | } while (0) | |
251 | ||
252 | #define _FP_FRAC_CONV_2_4(dfs, sfs, D, S) \ | |
253 | do { \ | |
254 | _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ | |
255 | _FP_WFRACBITS_##sfs); \ | |
256 | D##_f0 = S##_f[0]; \ | |
257 | D##_f1 = S##_f[1]; \ | |
258 | } while (0) | |
259 | ||
260 | /* Assembly/disassembly for converting to/from integral types. | |
261 | * No shifting or overflow handled here. | |
262 | */ | |
263 | /* Put the FP value X into r, which is an integer of size rsize. */ | |
264 | #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ | |
265 | do { \ | |
266 | if (rsize <= _FP_W_TYPE_SIZE) \ | |
267 | r = X##_f[0]; \ | |
268 | else if (rsize <= 2*_FP_W_TYPE_SIZE) \ | |
269 | { \ | |
270 | r = X##_f[1]; \ | |
271 | r <<= _FP_W_TYPE_SIZE; \ | |
272 | r += X##_f[0]; \ | |
273 | } \ | |
274 | else \ | |
275 | { \ | |
276 | /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ | |
277 | /* and int == 4words as a single case. */ \ | |
278 | r = X##_f[3]; \ | |
279 | r <<= _FP_W_TYPE_SIZE; \ | |
280 | r += X##_f[2]; \ | |
281 | r <<= _FP_W_TYPE_SIZE; \ | |
282 | r += X##_f[1]; \ | |
283 | r <<= _FP_W_TYPE_SIZE; \ | |
284 | r += X##_f[0]; \ | |
285 | } \ | |
286 | } while (0) | |
287 | ||
288 | /* "No disassemble Number Five!" */ | |
289 | /* move an integer of size rsize into X's fractional part. We rely on | |
290 | * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid | |
291 | * having to mask the values we store into it. | |
292 | */ | |
293 | #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ | |
294 | do { \ | |
295 | X##_f[0] = r; \ | |
296 | X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ | |
297 | X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ | |
298 | X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ | |
299 | } while (0) | |
300 | ||
301 | #define _FP_FRAC_CONV_4_1(dfs, sfs, D, S) \ | |
302 | do { \ | |
303 | D##_f[0] = S##_f; \ | |
304 | D##_f[1] = D##_f[2] = D##_f[3] = 0; \ | |
305 | _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ | |
306 | } while (0) | |
307 | ||
308 | #define _FP_FRAC_CONV_4_2(dfs, sfs, D, S) \ | |
309 | do { \ | |
310 | D##_f[0] = S##_f0; \ | |
311 | D##_f[1] = S##_f1; \ | |
312 | D##_f[2] = D##_f[3] = 0; \ | |
313 | _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ | |
314 | } while (0) | |
315 | ||
316 | /* FIXME! This has to be written */ | |
317 | #define _FP_SQRT_MEAT_4(R, S, T, X, q) |