Commit | Line | Data |
---|---|---|
e24c3bec MC |
1 | /* |
2 | * IEEE754 floating point arithmetic | |
3 | * double precision: MADDF.f (Fused Multiply Add) | |
4 | * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) | |
5 | * | |
6 | * MIPS floating point support | |
7 | * Copyright (C) 2015 Imagination Technologies, Ltd. | |
8 | * Author: Markos Chandras <markos.chandras@imgtec.com> | |
9 | * | |
10 | * This program is free software; you can distribute it and/or modify it | |
11 | * under the terms of the GNU General Public License as published by the | |
12 | * Free Software Foundation; version 2 of the License. | |
13 | */ | |
14 | ||
15 | #include "ieee754dp.h" | |
16 | ||
d728f670 PB |
17 | enum maddf_flags { |
18 | maddf_negate_product = 1 << 0, | |
19 | }; | |
20 | ||
21 | static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, | |
22 | union ieee754dp y, enum maddf_flags flags) | |
e24c3bec MC |
23 | { |
24 | int re; | |
25 | int rs; | |
26 | u64 rm; | |
27 | unsigned lxm; | |
28 | unsigned hxm; | |
29 | unsigned lym; | |
30 | unsigned hym; | |
31 | u64 lrm; | |
32 | u64 hrm; | |
33 | u64 t; | |
34 | u64 at; | |
35 | int s; | |
36 | ||
37 | COMPXDP; | |
38 | COMPYDP; | |
e2d11e1a | 39 | COMPZDP; |
e24c3bec MC |
40 | |
41 | EXPLODEXDP; | |
42 | EXPLODEYDP; | |
e2d11e1a | 43 | EXPLODEZDP; |
e24c3bec MC |
44 | |
45 | FLUSHXDP; | |
46 | FLUSHYDP; | |
e2d11e1a | 47 | FLUSHZDP; |
e24c3bec MC |
48 | |
49 | ieee754_clearcx(); | |
50 | ||
51 | switch (zc) { | |
52 | case IEEE754_CLASS_SNAN: | |
53 | ieee754_setcx(IEEE754_INVALID_OPERATION); | |
54 | return ieee754dp_nanxcpt(z); | |
55 | case IEEE754_CLASS_DNORM: | |
e2d11e1a | 56 | DPDNORMZ; |
e24c3bec MC |
57 | /* QNAN is handled separately below */ |
58 | } | |
59 | ||
60 | switch (CLPAIR(xc, yc)) { | |
61 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): | |
62 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): | |
63 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): | |
64 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): | |
65 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): | |
66 | return ieee754dp_nanxcpt(y); | |
67 | ||
68 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): | |
69 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): | |
70 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): | |
71 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): | |
72 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): | |
73 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): | |
74 | return ieee754dp_nanxcpt(x); | |
75 | ||
76 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): | |
77 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): | |
78 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): | |
79 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): | |
80 | return y; | |
81 | ||
82 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): | |
83 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): | |
84 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): | |
85 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): | |
86 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): | |
87 | return x; | |
88 | ||
89 | ||
90 | /* | |
91 | * Infinity handling | |
92 | */ | |
93 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): | |
94 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): | |
95 | if (zc == IEEE754_CLASS_QNAN) | |
96 | return z; | |
97 | ieee754_setcx(IEEE754_INVALID_OPERATION); | |
98 | return ieee754dp_indef(); | |
99 | ||
100 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): | |
101 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): | |
102 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): | |
103 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): | |
104 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): | |
105 | if (zc == IEEE754_CLASS_QNAN) | |
106 | return z; | |
107 | return ieee754dp_inf(xs ^ ys); | |
108 | ||
109 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): | |
110 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): | |
111 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): | |
112 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): | |
113 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): | |
114 | if (zc == IEEE754_CLASS_INF) | |
115 | return ieee754dp_inf(zs); | |
116 | /* Multiplication is 0 so just return z */ | |
117 | return z; | |
118 | ||
119 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): | |
120 | DPDNORMX; | |
121 | ||
122 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): | |
123 | if (zc == IEEE754_CLASS_QNAN) | |
124 | return z; | |
125 | else if (zc == IEEE754_CLASS_INF) | |
126 | return ieee754dp_inf(zs); | |
127 | DPDNORMY; | |
128 | break; | |
129 | ||
130 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): | |
131 | if (zc == IEEE754_CLASS_QNAN) | |
132 | return z; | |
133 | else if (zc == IEEE754_CLASS_INF) | |
134 | return ieee754dp_inf(zs); | |
135 | DPDNORMX; | |
136 | break; | |
137 | ||
138 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): | |
139 | if (zc == IEEE754_CLASS_QNAN) | |
140 | return z; | |
141 | else if (zc == IEEE754_CLASS_INF) | |
142 | return ieee754dp_inf(zs); | |
143 | /* fall through to real computations */ | |
144 | } | |
145 | ||
146 | /* Finally get to do some computation */ | |
147 | ||
148 | /* | |
149 | * Do the multiplication bit first | |
150 | * | |
151 | * rm = xm * ym, re = xe + ye basically | |
152 | * | |
153 | * At this point xm and ym should have been normalized. | |
154 | */ | |
155 | assert(xm & DP_HIDDEN_BIT); | |
156 | assert(ym & DP_HIDDEN_BIT); | |
157 | ||
158 | re = xe + ye; | |
159 | rs = xs ^ ys; | |
d728f670 PB |
160 | if (flags & maddf_negate_product) |
161 | rs ^= 1; | |
e24c3bec MC |
162 | |
163 | /* shunt to top of word */ | |
164 | xm <<= 64 - (DP_FBITS + 1); | |
165 | ym <<= 64 - (DP_FBITS + 1); | |
166 | ||
167 | /* | |
95bff241 | 168 | * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. |
e24c3bec MC |
169 | */ |
170 | ||
171 | /* 32 * 32 => 64 */ | |
172 | #define DPXMULT(x, y) ((u64)(x) * (u64)y) | |
173 | ||
174 | lxm = xm; | |
175 | hxm = xm >> 32; | |
176 | lym = ym; | |
177 | hym = ym >> 32; | |
178 | ||
179 | lrm = DPXMULT(lxm, lym); | |
180 | hrm = DPXMULT(hxm, hym); | |
181 | ||
182 | t = DPXMULT(lxm, hym); | |
183 | ||
184 | at = lrm + (t << 32); | |
185 | hrm += at < lrm; | |
186 | lrm = at; | |
187 | ||
188 | hrm = hrm + (t >> 32); | |
189 | ||
190 | t = DPXMULT(hxm, lym); | |
191 | ||
192 | at = lrm + (t << 32); | |
193 | hrm += at < lrm; | |
194 | lrm = at; | |
195 | ||
196 | hrm = hrm + (t >> 32); | |
197 | ||
198 | rm = hrm | (lrm != 0); | |
199 | ||
200 | /* | |
201 | * Sticky shift down to normal rounding precision. | |
202 | */ | |
203 | if ((s64) rm < 0) { | |
204 | rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | | |
205 | ((rm << (DP_FBITS + 1 + 3)) != 0); | |
5c18c936 | 206 | re++; |
e24c3bec MC |
207 | } else { |
208 | rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | | |
209 | ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); | |
210 | } | |
211 | assert(rm & (DP_HIDDEN_BIT << 3)); | |
212 | ||
213 | /* And now the addition */ | |
214 | assert(zm & DP_HIDDEN_BIT); | |
215 | ||
216 | /* | |
217 | * Provide guard,round and stick bit space. | |
218 | */ | |
219 | zm <<= 3; | |
220 | ||
221 | if (ze > re) { | |
222 | /* | |
223 | * Have to shift y fraction right to align. | |
224 | */ | |
225 | s = ze - re; | |
226 | rm = XDPSRS(rm, s); | |
227 | re += s; | |
228 | } else if (re > ze) { | |
229 | /* | |
230 | * Have to shift x fraction right to align. | |
231 | */ | |
232 | s = re - ze; | |
233 | zm = XDPSRS(zm, s); | |
234 | ze += s; | |
235 | } | |
236 | assert(ze == re); | |
237 | assert(ze <= DP_EMAX); | |
238 | ||
239 | if (zs == rs) { | |
240 | /* | |
241 | * Generate 28 bit result of adding two 27 bit numbers | |
242 | * leaving result in xm, xs and xe. | |
243 | */ | |
244 | zm = zm + rm; | |
245 | ||
246 | if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ | |
247 | zm = XDPSRS1(zm); | |
248 | ze++; | |
249 | } | |
250 | } else { | |
251 | if (zm >= rm) { | |
252 | zm = zm - rm; | |
253 | } else { | |
254 | zm = rm - zm; | |
255 | zs = rs; | |
256 | } | |
257 | if (zm == 0) | |
258 | return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); | |
259 | ||
260 | /* | |
261 | * Normalize to rounding precision. | |
262 | */ | |
263 | while ((zm >> (DP_FBITS + 3)) == 0) { | |
264 | zm <<= 1; | |
265 | ze--; | |
266 | } | |
267 | } | |
268 | ||
269 | return ieee754dp_format(zs, ze, zm); | |
270 | } | |
d728f670 PB |
271 | |
272 | union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, | |
273 | union ieee754dp y) | |
274 | { | |
275 | return _dp_maddf(z, x, y, 0); | |
276 | } | |
277 | ||
278 | union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, | |
279 | union ieee754dp y) | |
280 | { | |
281 | return _dp_maddf(z, x, y, maddf_negate_product); | |
282 | } |