Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Basic two-word fraction declaration and manipulation. | |
3 | */ | |
4 | ||
5 | #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1 | |
6 | #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1) | |
7 | #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I) | |
8 | #define _FP_FRAC_HIGH_2(X) (X##_f1) | |
9 | #define _FP_FRAC_LOW_2(X) (X##_f0) | |
10 | #define _FP_FRAC_WORD_2(X,w) (X##_f##w) | |
11 | ||
12 | #define _FP_FRAC_SLL_2(X,N) \ | |
13 | do { \ | |
14 | if ((N) < _FP_W_TYPE_SIZE) \ | |
15 | { \ | |
16 | if (__builtin_constant_p(N) && (N) == 1) \ | |
17 | { \ | |
18 | X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \ | |
19 | X##_f0 += X##_f0; \ | |
20 | } \ | |
21 | else \ | |
22 | { \ | |
23 | X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ | |
24 | X##_f0 <<= (N); \ | |
25 | } \ | |
26 | } \ | |
27 | else \ | |
28 | { \ | |
29 | X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ | |
30 | X##_f0 = 0; \ | |
31 | } \ | |
32 | } while (0) | |
33 | ||
34 | #define _FP_FRAC_SRL_2(X,N) \ | |
35 | do { \ | |
36 | if ((N) < _FP_W_TYPE_SIZE) \ | |
37 | { \ | |
38 | X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ | |
39 | X##_f1 >>= (N); \ | |
40 | } \ | |
41 | else \ | |
42 | { \ | |
43 | X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ | |
44 | X##_f1 = 0; \ | |
45 | } \ | |
46 | } while (0) | |
47 | ||
48 | /* Right shift with sticky-lsb. */ | |
49 | #define _FP_FRAC_SRS_2(X,N,sz) \ | |
50 | do { \ | |
51 | if ((N) < _FP_W_TYPE_SIZE) \ | |
52 | { \ | |
53 | X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \ | |
54 | (__builtin_constant_p(N) && (N) == 1 \ | |
55 | ? X##_f0 & 1 \ | |
56 | : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ | |
57 | X##_f1 >>= (N); \ | |
58 | } \ | |
59 | else \ | |
60 | { \ | |
61 | X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \ | |
c8968621 LY |
62 | (((X##_f1 << (2 * _FP_W_TYPE_SIZE - (N))) | \ |
63 | X##_f0) != 0)); \ | |
1da177e4 LT |
64 | X##_f1 = 0; \ |
65 | } \ | |
66 | } while (0) | |
67 | ||
68 | #define _FP_FRAC_ADDI_2(X,I) \ | |
69 | __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) | |
70 | ||
71 | #define _FP_FRAC_ADD_2(R,X,Y) \ | |
72 | __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) | |
73 | ||
74 | #define _FP_FRAC_SUB_2(R,X,Y) \ | |
75 | __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) | |
76 | ||
77 | #define _FP_FRAC_CLZ_2(R,X) \ | |
78 | do { \ | |
79 | if (X##_f1) \ | |
80 | __FP_CLZ(R,X##_f1); \ | |
81 | else \ | |
82 | { \ | |
83 | __FP_CLZ(R,X##_f0); \ | |
84 | R += _FP_W_TYPE_SIZE; \ | |
85 | } \ | |
86 | } while(0) | |
87 | ||
88 | /* Predicates */ | |
89 | #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0) | |
90 | #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) | |
91 | #define _FP_FRAC_OVERP_2(fs,X) (X##_f1 & _FP_OVERFLOW_##fs) | |
92 | #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) | |
93 | #define _FP_FRAC_GT_2(X, Y) \ | |
94 | ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) | |
95 | #define _FP_FRAC_GE_2(X, Y) \ | |
96 | ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) | |
97 | ||
98 | #define _FP_ZEROFRAC_2 0, 0 | |
99 | #define _FP_MINFRAC_2 0, 1 | |
100 | ||
101 | /* | |
102 | * Internals | |
103 | */ | |
104 | ||
105 | #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1) | |
106 | ||
107 | #define __FP_CLZ_2(R, xh, xl) \ | |
108 | do { \ | |
109 | if (xh) \ | |
110 | __FP_CLZ(R,xl); \ | |
111 | else \ | |
112 | { \ | |
113 | __FP_CLZ(R,xl); \ | |
114 | R += _FP_W_TYPE_SIZE; \ | |
115 | } \ | |
116 | } while(0) | |
117 | ||
118 | #if 0 | |
119 | ||
120 | #ifndef __FP_FRAC_ADDI_2 | |
121 | #define __FP_FRAC_ADDI_2(xh, xl, i) \ | |
122 | (xh += ((xl += i) < i)) | |
123 | #endif | |
124 | #ifndef __FP_FRAC_ADD_2 | |
125 | #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \ | |
126 | (rh = xh + yh + ((rl = xl + yl) < xl)) | |
127 | #endif | |
128 | #ifndef __FP_FRAC_SUB_2 | |
129 | #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \ | |
130 | (rh = xh - yh - ((rl = xl - yl) > xl)) | |
131 | #endif | |
132 | ||
133 | #else | |
134 | ||
135 | #undef __FP_FRAC_ADDI_2 | |
136 | #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i) | |
137 | #undef __FP_FRAC_ADD_2 | |
138 | #define __FP_FRAC_ADD_2 add_ssaaaa | |
139 | #undef __FP_FRAC_SUB_2 | |
140 | #define __FP_FRAC_SUB_2 sub_ddmmss | |
141 | ||
142 | #endif | |
143 | ||
144 | /* | |
145 | * Unpack the raw bits of a native fp value. Do not classify or | |
146 | * normalize the data. | |
147 | */ | |
148 | ||
149 | #define _FP_UNPACK_RAW_2(fs, X, val) \ | |
150 | do { \ | |
151 | union _FP_UNION_##fs _flo; _flo.flt = (val); \ | |
152 | \ | |
153 | X##_f0 = _flo.bits.frac0; \ | |
154 | X##_f1 = _flo.bits.frac1; \ | |
155 | X##_e = _flo.bits.exp; \ | |
156 | X##_s = _flo.bits.sign; \ | |
157 | } while (0) | |
158 | ||
159 | ||
160 | /* | |
161 | * Repack the raw bits of a native fp value. | |
162 | */ | |
163 | ||
164 | #define _FP_PACK_RAW_2(fs, val, X) \ | |
165 | do { \ | |
166 | union _FP_UNION_##fs _flo; \ | |
167 | \ | |
168 | _flo.bits.frac0 = X##_f0; \ | |
169 | _flo.bits.frac1 = X##_f1; \ | |
170 | _flo.bits.exp = X##_e; \ | |
171 | _flo.bits.sign = X##_s; \ | |
172 | \ | |
173 | (val) = _flo.flt; \ | |
174 | } while (0) | |
175 | ||
176 | ||
177 | /* | |
178 | * Multiplication algorithms: | |
179 | */ | |
180 | ||
181 | /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ | |
182 | ||
183 | #define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit) \ | |
184 | do { \ | |
185 | _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ | |
186 | \ | |
187 | doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ | |
188 | doit(_b_f1, _b_f0, X##_f0, Y##_f1); \ | |
189 | doit(_c_f1, _c_f0, X##_f1, Y##_f0); \ | |
190 | doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \ | |
191 | \ | |
192 | __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ | |
193 | _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \ | |
194 | 0, _b_f1, _b_f0, 0, \ | |
195 | _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ | |
196 | _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0)); \ | |
197 | __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ | |
198 | _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \ | |
199 | 0, _c_f1, _c_f0, 0, \ | |
200 | _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ | |
201 | _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0)); \ | |
202 | \ | |
203 | /* Normalize since we know where the msb of the multiplicands \ | |
204 | were (bit B), we know that the msb of the of the product is \ | |
205 | at either 2B or 2B-1. */ \ | |
206 | _FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ | |
207 | R##_f0 = _FP_FRAC_WORD_4(_z,0); \ | |
208 | R##_f1 = _FP_FRAC_WORD_4(_z,1); \ | |
209 | } while (0) | |
210 | ||
211 | /* This next macro appears to be totally broken. Fortunately nowhere | |
212 | * seems to use it :-> The problem is that we define _z[4] but | |
213 | * then use it in _FP_FRAC_SRS_4, which will attempt to access | |
214 | * _z_f[n] which will cause an error. The fix probably involves | |
215 | * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998 | |
216 | */ | |
217 | #define _FP_MUL_MEAT_2_gmp(fs, R, X, Y) \ | |
218 | do { \ | |
219 | _FP_W_TYPE _x[2], _y[2], _z[4]; \ | |
220 | _x[0] = X##_f0; _x[1] = X##_f1; \ | |
221 | _y[0] = Y##_f0; _y[1] = Y##_f1; \ | |
222 | \ | |
223 | mpn_mul_n(_z, _x, _y, 2); \ | |
224 | \ | |
225 | /* Normalize since we know where the msb of the multiplicands \ | |
226 | were (bit B), we know that the msb of the of the product is \ | |
227 | at either 2B or 2B-1. */ \ | |
228 | _FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs); \ | |
229 | R##_f0 = _z[0]; \ | |
230 | R##_f1 = _z[1]; \ | |
231 | } while (0) | |
232 | ||
233 | ||
234 | /* | |
235 | * Division algorithms: | |
236 | * This seems to be giving me difficulties -- PMM | |
237 | * Look, NetBSD seems to be able to comment algorithms. Can't you? | |
238 | * I've thrown printks at the problem. | |
239 | * This now appears to work, but I still don't really know why. | |
240 | * Also, I don't think the result is properly normalised... | |
241 | */ | |
242 | ||
243 | #define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y) \ | |
244 | do { \ | |
245 | extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2], \ | |
246 | _FP_W_TYPE n1, _FP_W_TYPE n0, \ | |
247 | _FP_W_TYPE d1, _FP_W_TYPE d0); \ | |
248 | _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0; \ | |
249 | _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0; \ | |
250 | _FP_W_TYPE _rmem[2], _qmem[2]; \ | |
251 | /* I think this check is to ensure that the result is normalised. \ | |
252 | * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in \ | |
253 | * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y. \ | |
254 | * In this case we tweak things. (this is based on comments in \ | |
255 | * the NetBSD FPU emulation code. ) \ | |
256 | * We know X,Y are normalised because we ensure this as part of \ | |
257 | * the unpacking process. -- PMM \ | |
258 | */ \ | |
259 | if (_FP_FRAC_GT_2(X, Y)) \ | |
260 | { \ | |
261 | /* R##_e++; */ \ | |
262 | _n_f3 = X##_f1 >> 1; \ | |
263 | _n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ | |
264 | _n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ | |
265 | _n_f0 = 0; \ | |
266 | } \ | |
267 | else \ | |
268 | { \ | |
269 | R##_e--; \ | |
270 | _n_f3 = X##_f1; \ | |
271 | _n_f2 = X##_f0; \ | |
272 | _n_f1 = _n_f0 = 0; \ | |
273 | } \ | |
274 | \ | |
275 | /* Normalize, i.e. make the most significant bit of the \ | |
276 | denominator set. CHANGED: - 1 to nothing -- PMM */ \ | |
277 | _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */); \ | |
278 | \ | |
279 | /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 \ | |
280 | primitive snagged from libgcc2.c. */ \ | |
281 | \ | |
282 | _fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1); \ | |
283 | _q_f1 = _qmem[0]; \ | |
284 | umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0); \ | |
285 | _r_f1 = _rmem[0]; \ | |
286 | _r_f0 = _n_f1; \ | |
287 | if (_FP_FRAC_GT_2(_m, _r)) \ | |
288 | { \ | |
289 | _q_f1--; \ | |
290 | _FP_FRAC_ADD_2(_r, _r, Y); \ | |
291 | if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ | |
292 | { \ | |
293 | _q_f1--; \ | |
294 | _FP_FRAC_ADD_2(_r, _r, Y); \ | |
295 | } \ | |
296 | } \ | |
297 | _FP_FRAC_SUB_2(_r, _r, _m); \ | |
298 | \ | |
299 | _fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1); \ | |
300 | _q_f0 = _qmem[0]; \ | |
301 | umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0); \ | |
302 | _r_f1 = _rmem[0]; \ | |
303 | _r_f0 = _n_f0; \ | |
304 | if (_FP_FRAC_GT_2(_m, _r)) \ | |
305 | { \ | |
306 | _q_f0--; \ | |
307 | _FP_FRAC_ADD_2(_r, _r, Y); \ | |
308 | if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ | |
309 | { \ | |
310 | _q_f0--; \ | |
311 | _FP_FRAC_ADD_2(_r, _r, Y); \ | |
312 | } \ | |
313 | } \ | |
314 | _FP_FRAC_SUB_2(_r, _r, _m); \ | |
315 | \ | |
316 | R##_f1 = _q_f1; \ | |
317 | R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0); \ | |
318 | /* adjust so answer is normalized again. I'm not sure what the \ | |
319 | * final sz param should be. In practice it's never used since \ | |
320 | * N is 1 which is always going to be < _FP_W_TYPE_SIZE... \ | |
321 | */ \ | |
322 | /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs); */ \ | |
323 | } while (0) | |
324 | ||
325 | ||
326 | #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \ | |
327 | do { \ | |
328 | _FP_W_TYPE _x[4], _y[2], _z[4]; \ | |
329 | _y[0] = Y##_f0; _y[1] = Y##_f1; \ | |
330 | _x[0] = _x[3] = 0; \ | |
331 | if (_FP_FRAC_GT_2(X, Y)) \ | |
332 | { \ | |
333 | R##_e++; \ | |
334 | _x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) | \ | |
335 | X##_f1 >> (_FP_W_TYPE_SIZE - \ | |
336 | (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE))); \ | |
337 | _x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE); \ | |
338 | } \ | |
339 | else \ | |
340 | { \ | |
341 | _x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) | \ | |
342 | X##_f1 >> (_FP_W_TYPE_SIZE - \ | |
343 | (_FP_WFRACBITS - _FP_W_TYPE_SIZE))); \ | |
344 | _x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE); \ | |
345 | } \ | |
346 | \ | |
347 | (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \ | |
348 | R##_f1 = _z[1]; \ | |
349 | R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \ | |
350 | } while (0) | |
351 | ||
352 | ||
353 | /* | |
354 | * Square root algorithms: | |
355 | * We have just one right now, maybe Newton approximation | |
356 | * should be added for those machines where division is fast. | |
357 | */ | |
358 | ||
359 | #define _FP_SQRT_MEAT_2(R, S, T, X, q) \ | |
360 | do { \ | |
361 | while (q) \ | |
362 | { \ | |
363 | T##_f1 = S##_f1 + q; \ | |
364 | if (T##_f1 <= X##_f1) \ | |
365 | { \ | |
366 | S##_f1 = T##_f1 + q; \ | |
367 | X##_f1 -= T##_f1; \ | |
368 | R##_f1 += q; \ | |
369 | } \ | |
370 | _FP_FRAC_SLL_2(X, 1); \ | |
371 | q >>= 1; \ | |
372 | } \ | |
373 | q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ | |
374 | while (q) \ | |
375 | { \ | |
376 | T##_f0 = S##_f0 + q; \ | |
377 | T##_f1 = S##_f1; \ | |
378 | if (T##_f1 < X##_f1 || \ | |
379 | (T##_f1 == X##_f1 && T##_f0 < X##_f0)) \ | |
380 | { \ | |
381 | S##_f0 = T##_f0 + q; \ | |
382 | if (((_FP_WS_TYPE)T##_f0) < 0 && \ | |
383 | ((_FP_WS_TYPE)S##_f0) >= 0) \ | |
384 | S##_f1++; \ | |
385 | _FP_FRAC_SUB_2(X, X, T); \ | |
386 | R##_f0 += q; \ | |
387 | } \ | |
388 | _FP_FRAC_SLL_2(X, 1); \ | |
389 | q >>= 1; \ | |
390 | } \ | |
391 | } while (0) | |
392 | ||
393 | ||
394 | /* | |
395 | * Assembly/disassembly for converting to/from integral types. | |
396 | * No shifting or overflow handled here. | |
397 | */ | |
398 | ||
399 | #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ | |
400 | do { \ | |
401 | if (rsize <= _FP_W_TYPE_SIZE) \ | |
402 | r = X##_f0; \ | |
403 | else \ | |
404 | { \ | |
405 | r = X##_f1; \ | |
406 | r <<= _FP_W_TYPE_SIZE; \ | |
407 | r += X##_f0; \ | |
408 | } \ | |
409 | } while (0) | |
410 | ||
411 | #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ | |
412 | do { \ | |
413 | X##_f0 = r; \ | |
414 | X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ | |
415 | } while (0) | |
416 | ||
417 | /* | |
418 | * Convert FP values between word sizes | |
419 | */ | |
420 | ||
421 | #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \ | |
422 | do { \ | |
423 | _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ | |
424 | _FP_WFRACBITS_##sfs); \ | |
425 | D##_f = S##_f0; \ | |
426 | } while (0) | |
427 | ||
428 | #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \ | |
429 | do { \ | |
430 | D##_f0 = S##_f; \ | |
431 | D##_f1 = 0; \ | |
432 | _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ | |
433 | } while (0) | |
434 |