diff options
author | Paul Burton <paul.burton@imgtec.com> | 2016-04-21 09:04:49 -0400 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2016-05-13 08:02:22 -0400 |
commit | 6162051e87f6ea785cb51ad99bdcf8eb0bd9cb07 (patch) | |
tree | dcbed0ce8143e823e3cd3ba8db3810b89861705e /arch/mips | |
parent | 4b820d95dc53c15e6e727da964430a3ed60e05ef (diff) |
MIPS: math-emu: Unify ieee754sp_m{add,sub}f
The code for emulating MIPSr6 madd.s & msub.s instructions has
previously been implemented as 2 different functions, namely
ieee754sp_maddf & ieee754sp_msubf. The difference in behaviour of these
2 instructions is merely the sign of the product, so we can easily share
the code implementing them. Do this for the single precision variant,
removing the original ieee754sp_msubf in favor of reusing the code from
ieee754sp_maddf.
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/13154/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips')
-rw-r--r-- | arch/mips/math-emu/Makefile | 2 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_maddf.c | 22 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_msubf.c | 258 |
3 files changed, 21 insertions, 261 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile index a19641d3ac23..3389aff21783 100644 --- a/arch/mips/math-emu/Makefile +++ b/arch/mips/math-emu/Makefile | |||
@@ -6,7 +6,7 @@ obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \ | |||
6 | dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ | 6 | dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ |
7 | dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o dp_2008class.o dp_fmin.o dp_fmax.o \ | 7 | dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o dp_2008class.o dp_fmin.o dp_fmax.o \ |
8 | sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ | 8 | sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ |
9 | sp_tint.o sp_fint.o sp_maddf.o sp_msubf.o sp_2008class.o sp_fmin.o sp_fmax.o \ | 9 | sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \ |
10 | dsemul.o | 10 | dsemul.o |
11 | 11 | ||
12 | lib-y += ieee754d.o \ | 12 | lib-y += ieee754d.o \ |
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index dd1dd83e34eb..93b7132d60e2 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c | |||
@@ -14,8 +14,12 @@ | |||
14 | 14 | ||
15 | #include "ieee754sp.h" | 15 | #include "ieee754sp.h" |
16 | 16 | ||
17 | union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, | 17 | enum maddf_flags { |
18 | union ieee754sp y) | 18 | maddf_negate_product = 1 << 0, |
19 | }; | ||
20 | |||
21 | static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, | ||
22 | union ieee754sp y, enum maddf_flags flags) | ||
19 | { | 23 | { |
20 | int re; | 24 | int re; |
21 | int rs; | 25 | int rs; |
@@ -154,6 +158,8 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, | |||
154 | 158 | ||
155 | re = xe + ye; | 159 | re = xe + ye; |
156 | rs = xs ^ ys; | 160 | rs = xs ^ ys; |
161 | if (flags & maddf_negate_product) | ||
162 | rs ^= 1; | ||
157 | 163 | ||
158 | /* shunt to top of word */ | 164 | /* shunt to top of word */ |
159 | xm <<= 32 - (SP_FBITS + 1); | 165 | xm <<= 32 - (SP_FBITS + 1); |
@@ -253,3 +259,15 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, | |||
253 | } | 259 | } |
254 | return ieee754sp_format(zs, ze, zm); | 260 | return ieee754sp_format(zs, ze, zm); |
255 | } | 261 | } |
262 | |||
263 | union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, | ||
264 | union ieee754sp y) | ||
265 | { | ||
266 | return _sp_maddf(z, x, y, 0); | ||
267 | } | ||
268 | |||
269 | union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, | ||
270 | union ieee754sp y) | ||
271 | { | ||
272 | return _sp_maddf(z, x, y, maddf_negate_product); | ||
273 | } | ||
diff --git a/arch/mips/math-emu/sp_msubf.c b/arch/mips/math-emu/sp_msubf.c deleted file mode 100644 index 81c38b980d69..000000000000 --- a/arch/mips/math-emu/sp_msubf.c +++ /dev/null | |||
@@ -1,258 +0,0 @@ | |||
1 | /* | ||
2 | * IEEE754 floating point arithmetic | ||
3 | * single precision: MSUB.f (Fused Multiply Subtract) | ||
4 | * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) | ||
5 | * | ||
6 | * MIPS floating point support | ||
7 | * Copyright (C) 2015 Imagination Technologies, Ltd. | ||
8 | * Author: Markos Chandras <markos.chandras@imgtec.com> | ||
9 | * | ||
10 | * This program is free software; you can distribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License as published by the | ||
12 | * Free Software Foundation; version 2 of the License. | ||
13 | */ | ||
14 | |||
15 | #include "ieee754sp.h" | ||
16 | |||
17 | union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, | ||
18 | union ieee754sp y) | ||
19 | { | ||
20 | int re; | ||
21 | int rs; | ||
22 | unsigned rm; | ||
23 | unsigned short lxm; | ||
24 | unsigned short hxm; | ||
25 | unsigned short lym; | ||
26 | unsigned short hym; | ||
27 | unsigned lrm; | ||
28 | unsigned hrm; | ||
29 | unsigned t; | ||
30 | unsigned at; | ||
31 | int s; | ||
32 | |||
33 | COMPXSP; | ||
34 | COMPYSP; | ||
35 | u32 zm; int ze; int zs __maybe_unused; int zc; | ||
36 | |||
37 | EXPLODEXSP; | ||
38 | EXPLODEYSP; | ||
39 | EXPLODESP(z, zc, zs, ze, zm) | ||
40 | |||
41 | FLUSHXSP; | ||
42 | FLUSHYSP; | ||
43 | FLUSHSP(z, zc, zs, ze, zm); | ||
44 | |||
45 | ieee754_clearcx(); | ||
46 | |||
47 | switch (zc) { | ||
48 | case IEEE754_CLASS_SNAN: | ||
49 | ieee754_setcx(IEEE754_INVALID_OPERATION); | ||
50 | return ieee754sp_nanxcpt(z); | ||
51 | case IEEE754_CLASS_DNORM: | ||
52 | SPDNORMx(zm, ze); | ||
53 | /* QNAN is handled separately below */ | ||
54 | } | ||
55 | |||
56 | switch (CLPAIR(xc, yc)) { | ||
57 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): | ||
58 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): | ||
59 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): | ||
60 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): | ||
61 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): | ||
62 | return ieee754sp_nanxcpt(y); | ||
63 | |||
64 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): | ||
65 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): | ||
66 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): | ||
67 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): | ||
68 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): | ||
69 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): | ||
70 | return ieee754sp_nanxcpt(x); | ||
71 | |||
72 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): | ||
73 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): | ||
74 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): | ||
75 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): | ||
76 | return y; | ||
77 | |||
78 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): | ||
79 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): | ||
80 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): | ||
81 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): | ||
82 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): | ||
83 | return x; | ||
84 | |||
85 | /* | ||
86 | * Infinity handling | ||
87 | */ | ||
88 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): | ||
89 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): | ||
90 | if (zc == IEEE754_CLASS_QNAN) | ||
91 | return z; | ||
92 | ieee754_setcx(IEEE754_INVALID_OPERATION); | ||
93 | return ieee754sp_indef(); | ||
94 | |||
95 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): | ||
96 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): | ||
97 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): | ||
98 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): | ||
99 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): | ||
100 | if (zc == IEEE754_CLASS_QNAN) | ||
101 | return z; | ||
102 | return ieee754sp_inf(xs ^ ys); | ||
103 | |||
104 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): | ||
105 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): | ||
106 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): | ||
107 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): | ||
108 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): | ||
109 | if (zc == IEEE754_CLASS_INF) | ||
110 | return ieee754sp_inf(zs); | ||
111 | /* Multiplication is 0 so just return z */ | ||
112 | return z; | ||
113 | |||
114 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): | ||
115 | SPDNORMX; | ||
116 | |||
117 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): | ||
118 | if (zc == IEEE754_CLASS_QNAN) | ||
119 | return z; | ||
120 | else if (zc == IEEE754_CLASS_INF) | ||
121 | return ieee754sp_inf(zs); | ||
122 | SPDNORMY; | ||
123 | break; | ||
124 | |||
125 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): | ||
126 | if (zc == IEEE754_CLASS_QNAN) | ||
127 | return z; | ||
128 | else if (zc == IEEE754_CLASS_INF) | ||
129 | return ieee754sp_inf(zs); | ||
130 | SPDNORMX; | ||
131 | break; | ||
132 | |||
133 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): | ||
134 | if (zc == IEEE754_CLASS_QNAN) | ||
135 | return z; | ||
136 | else if (zc == IEEE754_CLASS_INF) | ||
137 | return ieee754sp_inf(zs); | ||
138 | /* fall through to real compuation */ | ||
139 | } | ||
140 | |||
141 | /* Finally get to do some computation */ | ||
142 | |||
143 | /* | ||
144 | * Do the multiplication bit first | ||
145 | * | ||
146 | * rm = xm * ym, re = xe + ye basically | ||
147 | * | ||
148 | * At this point xm and ym should have been normalized. | ||
149 | */ | ||
150 | |||
151 | /* rm = xm * ym, re = xe+ye basically */ | ||
152 | assert(xm & SP_HIDDEN_BIT); | ||
153 | assert(ym & SP_HIDDEN_BIT); | ||
154 | |||
155 | re = xe + ye; | ||
156 | rs = xs ^ ys; | ||
157 | |||
158 | /* shunt to top of word */ | ||
159 | xm <<= 32 - (SP_FBITS + 1); | ||
160 | ym <<= 32 - (SP_FBITS + 1); | ||
161 | |||
162 | /* | ||
163 | * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. | ||
164 | */ | ||
165 | lxm = xm & 0xffff; | ||
166 | hxm = xm >> 16; | ||
167 | lym = ym & 0xffff; | ||
168 | hym = ym >> 16; | ||
169 | |||
170 | lrm = lxm * lym; /* 16 * 16 => 32 */ | ||
171 | hrm = hxm * hym; /* 16 * 16 => 32 */ | ||
172 | |||
173 | t = lxm * hym; /* 16 * 16 => 32 */ | ||
174 | at = lrm + (t << 16); | ||
175 | hrm += at < lrm; | ||
176 | lrm = at; | ||
177 | hrm = hrm + (t >> 16); | ||
178 | |||
179 | t = hxm * lym; /* 16 * 16 => 32 */ | ||
180 | at = lrm + (t << 16); | ||
181 | hrm += at < lrm; | ||
182 | lrm = at; | ||
183 | hrm = hrm + (t >> 16); | ||
184 | |||
185 | rm = hrm | (lrm != 0); | ||
186 | |||
187 | /* | ||
188 | * Sticky shift down to normal rounding precision. | ||
189 | */ | ||
190 | if ((int) rm < 0) { | ||
191 | rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | | ||
192 | ((rm << (SP_FBITS + 1 + 3)) != 0); | ||
193 | re++; | ||
194 | } else { | ||
195 | rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | | ||
196 | ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); | ||
197 | } | ||
198 | assert(rm & (SP_HIDDEN_BIT << 3)); | ||
199 | |||
200 | /* And now the subtraction */ | ||
201 | |||
202 | /* Flip sign of r and handle as add */ | ||
203 | rs ^= 1; | ||
204 | |||
205 | assert(zm & SP_HIDDEN_BIT); | ||
206 | |||
207 | /* | ||
208 | * Provide guard,round and stick bit space. | ||
209 | */ | ||
210 | zm <<= 3; | ||
211 | |||
212 | if (ze > re) { | ||
213 | /* | ||
214 | * Have to shift y fraction right to align. | ||
215 | */ | ||
216 | s = ze - re; | ||
217 | SPXSRSYn(s); | ||
218 | } else if (re > ze) { | ||
219 | /* | ||
220 | * Have to shift x fraction right to align. | ||
221 | */ | ||
222 | s = re - ze; | ||
223 | SPXSRSYn(s); | ||
224 | } | ||
225 | assert(ze == re); | ||
226 | assert(ze <= SP_EMAX); | ||
227 | |||
228 | if (zs == rs) { | ||
229 | /* | ||
230 | * Generate 28 bit result of adding two 27 bit numbers | ||
231 | * leaving result in zm, zs and ze. | ||
232 | */ | ||
233 | zm = zm + rm; | ||
234 | |||
235 | if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ | ||
236 | SPXSRSX1(); /* shift preserving sticky */ | ||
237 | } | ||
238 | } else { | ||
239 | if (zm >= rm) { | ||
240 | zm = zm - rm; | ||
241 | } else { | ||
242 | zm = rm - zm; | ||
243 | zs = rs; | ||
244 | } | ||
245 | if (zm == 0) | ||
246 | return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); | ||
247 | |||
248 | /* | ||
249 | * Normalize in extended single precision | ||
250 | */ | ||
251 | while ((zm >> (SP_MBITS + 3)) == 0) { | ||
252 | zm <<= 1; | ||
253 | ze--; | ||
254 | } | ||
255 | |||
256 | } | ||
257 | return ieee754sp_format(zs, ze, zm); | ||
258 | } | ||