Commit | Line | Data |
---|---|---|
e24c3bec MC |
1 | /* |
2 | * IEEE754 floating point arithmetic | |
3 | * single precision: MADDF.f (Fused Multiply Add) | |
4 | * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) | |
5 | * | |
6 | * MIPS floating point support | |
7 | * Copyright (C) 2015 Imagination Technologies, Ltd. | |
8 | * Author: Markos Chandras <markos.chandras@imgtec.com> | |
9 | * | |
10 | * This program is free software; you can distribute it and/or modify it | |
11 | * under the terms of the GNU General Public License as published by the | |
12 | * Free Software Foundation; version 2 of the License. | |
13 | */ | |
14 | ||
15 | #include "ieee754sp.h" | |
16 | ||
6162051e PB |
17 | enum maddf_flags { |
18 | maddf_negate_product = 1 << 0, | |
19 | }; | |
20 | ||
21 | static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, | |
22 | union ieee754sp y, enum maddf_flags flags) | |
e24c3bec MC |
23 | { |
24 | int re; | |
25 | int rs; | |
26 | unsigned rm; | |
27 | unsigned short lxm; | |
28 | unsigned short hxm; | |
29 | unsigned short lym; | |
30 | unsigned short hym; | |
31 | unsigned lrm; | |
32 | unsigned hrm; | |
33 | unsigned t; | |
34 | unsigned at; | |
35 | int s; | |
36 | ||
37 | COMPXSP; | |
38 | COMPYSP; | |
e2d11e1a | 39 | COMPZSP; |
e24c3bec MC |
40 | |
41 | EXPLODEXSP; | |
42 | EXPLODEYSP; | |
e2d11e1a | 43 | EXPLODEZSP; |
e24c3bec MC |
44 | |
45 | FLUSHXSP; | |
46 | FLUSHYSP; | |
e2d11e1a | 47 | FLUSHZSP; |
e24c3bec MC |
48 | |
49 | ieee754_clearcx(); | |
50 | ||
51 | switch (zc) { | |
52 | case IEEE754_CLASS_SNAN: | |
53 | ieee754_setcx(IEEE754_INVALID_OPERATION); | |
54 | return ieee754sp_nanxcpt(z); | |
55 | case IEEE754_CLASS_DNORM: | |
e2d11e1a | 56 | SPDNORMZ; |
e24c3bec MC |
57 | /* QNAN is handled separately below */ |
58 | } | |
59 | ||
60 | switch (CLPAIR(xc, yc)) { | |
61 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): | |
62 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): | |
63 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): | |
64 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): | |
65 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): | |
66 | return ieee754sp_nanxcpt(y); | |
67 | ||
68 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): | |
69 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): | |
70 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): | |
71 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): | |
72 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): | |
73 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): | |
74 | return ieee754sp_nanxcpt(x); | |
75 | ||
76 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): | |
77 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): | |
78 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): | |
79 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): | |
80 | return y; | |
81 | ||
82 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): | |
83 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): | |
84 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): | |
85 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): | |
86 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): | |
87 | return x; | |
88 | ||
89 | /* | |
90 | * Infinity handling | |
91 | */ | |
92 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): | |
93 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): | |
94 | if (zc == IEEE754_CLASS_QNAN) | |
95 | return z; | |
96 | ieee754_setcx(IEEE754_INVALID_OPERATION); | |
97 | return ieee754sp_indef(); | |
98 | ||
99 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): | |
100 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): | |
101 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): | |
102 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): | |
103 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): | |
104 | if (zc == IEEE754_CLASS_QNAN) | |
105 | return z; | |
106 | return ieee754sp_inf(xs ^ ys); | |
107 | ||
108 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): | |
109 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): | |
110 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): | |
111 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): | |
112 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): | |
113 | if (zc == IEEE754_CLASS_INF) | |
114 | return ieee754sp_inf(zs); | |
115 | /* Multiplication is 0 so just return z */ | |
116 | return z; | |
117 | ||
118 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): | |
119 | SPDNORMX; | |
120 | ||
121 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): | |
122 | if (zc == IEEE754_CLASS_QNAN) | |
123 | return z; | |
124 | else if (zc == IEEE754_CLASS_INF) | |
125 | return ieee754sp_inf(zs); | |
126 | SPDNORMY; | |
127 | break; | |
128 | ||
129 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): | |
130 | if (zc == IEEE754_CLASS_QNAN) | |
131 | return z; | |
132 | else if (zc == IEEE754_CLASS_INF) | |
133 | return ieee754sp_inf(zs); | |
134 | SPDNORMX; | |
135 | break; | |
136 | ||
137 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): | |
138 | if (zc == IEEE754_CLASS_QNAN) | |
139 | return z; | |
140 | else if (zc == IEEE754_CLASS_INF) | |
141 | return ieee754sp_inf(zs); | |
142 | /* fall through to real computations */ | |
143 | } | |
144 | ||
145 | /* Finally get to do some computation */ | |
146 | ||
147 | /* | |
148 | * Do the multiplication bit first | |
149 | * | |
150 | * rm = xm * ym, re = xe + ye basically | |
151 | * | |
152 | * At this point xm and ym should have been normalized. | |
153 | */ | |
154 | ||
155 | /* rm = xm * ym, re = xe+ye basically */ | |
156 | assert(xm & SP_HIDDEN_BIT); | |
157 | assert(ym & SP_HIDDEN_BIT); | |
158 | ||
159 | re = xe + ye; | |
160 | rs = xs ^ ys; | |
6162051e PB |
161 | if (flags & maddf_negate_product) |
162 | rs ^= 1; | |
e24c3bec MC |
163 | |
164 | /* shunt to top of word */ | |
165 | xm <<= 32 - (SP_FBITS + 1); | |
166 | ym <<= 32 - (SP_FBITS + 1); | |
167 | ||
168 | /* | |
169 | * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. | |
170 | */ | |
171 | lxm = xm & 0xffff; | |
172 | hxm = xm >> 16; | |
173 | lym = ym & 0xffff; | |
174 | hym = ym >> 16; | |
175 | ||
176 | lrm = lxm * lym; /* 16 * 16 => 32 */ | |
177 | hrm = hxm * hym; /* 16 * 16 => 32 */ | |
178 | ||
179 | t = lxm * hym; /* 16 * 16 => 32 */ | |
180 | at = lrm + (t << 16); | |
181 | hrm += at < lrm; | |
182 | lrm = at; | |
183 | hrm = hrm + (t >> 16); | |
184 | ||
185 | t = hxm * lym; /* 16 * 16 => 32 */ | |
186 | at = lrm + (t << 16); | |
187 | hrm += at < lrm; | |
188 | lrm = at; | |
189 | hrm = hrm + (t >> 16); | |
190 | ||
191 | rm = hrm | (lrm != 0); | |
192 | ||
193 | /* | |
194 | * Sticky shift down to normal rounding precision. | |
195 | */ | |
196 | if ((int) rm < 0) { | |
197 | rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | | |
198 | ((rm << (SP_FBITS + 1 + 3)) != 0); | |
199 | re++; | |
200 | } else { | |
201 | rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | | |
202 | ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); | |
203 | } | |
204 | assert(rm & (SP_HIDDEN_BIT << 3)); | |
205 | ||
206 | /* And now the addition */ | |
207 | ||
208 | assert(zm & SP_HIDDEN_BIT); | |
209 | ||
210 | /* | |
211 | * Provide guard,round and stick bit space. | |
212 | */ | |
213 | zm <<= 3; | |
214 | ||
215 | if (ze > re) { | |
216 | /* | |
db57f29d | 217 | * Have to shift r fraction right to align. |
e24c3bec MC |
218 | */ |
219 | s = ze - re; | |
db57f29d PB |
220 | rm = XSPSRS(rm, s); |
221 | re += s; | |
e24c3bec MC |
222 | } else if (re > ze) { |
223 | /* | |
db57f29d | 224 | * Have to shift z fraction right to align. |
e24c3bec MC |
225 | */ |
226 | s = re - ze; | |
db57f29d PB |
227 | zm = XSPSRS(zm, s); |
228 | ze += s; | |
e24c3bec MC |
229 | } |
230 | assert(ze == re); | |
231 | assert(ze <= SP_EMAX); | |
232 | ||
233 | if (zs == rs) { | |
234 | /* | |
235 | * Generate 28 bit result of adding two 27 bit numbers | |
236 | * leaving result in zm, zs and ze. | |
237 | */ | |
238 | zm = zm + rm; | |
239 | ||
240 | if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ | |
db57f29d PB |
241 | zm = XSPSRS1(zm); |
242 | ze++; | |
e24c3bec MC |
243 | } |
244 | } else { | |
245 | if (zm >= rm) { | |
246 | zm = zm - rm; | |
247 | } else { | |
248 | zm = rm - zm; | |
249 | zs = rs; | |
250 | } | |
251 | if (zm == 0) | |
252 | return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); | |
253 | ||
254 | /* | |
255 | * Normalize in extended single precision | |
256 | */ | |
257 | while ((zm >> (SP_MBITS + 3)) == 0) { | |
258 | zm <<= 1; | |
259 | ze--; | |
260 | } | |
261 | ||
262 | } | |
263 | return ieee754sp_format(zs, ze, zm); | |
264 | } | |
6162051e PB |
265 | |
266 | union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, | |
267 | union ieee754sp y) | |
268 | { | |
269 | return _sp_maddf(z, x, y, 0); | |
270 | } | |
271 | ||
272 | union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, | |
273 | union ieee754sp y) | |
274 | { | |
275 | return _sp_maddf(z, x, y, maddf_negate_product); | |
276 | } |