aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips
diff options
context:
space:
mode:
authorPaul Burton <paul.burton@imgtec.com>2016-04-21 09:04:49 -0400
committerRalf Baechle <ralf@linux-mips.org>2016-05-13 08:02:22 -0400
commit6162051e87f6ea785cb51ad99bdcf8eb0bd9cb07 (patch)
treedcbed0ce8143e823e3cd3ba8db3810b89861705e /arch/mips
parent4b820d95dc53c15e6e727da964430a3ed60e05ef (diff)
MIPS: math-emu: Unify ieee754sp_m{add,sub}f
The code for emulating MIPSr6 madd.s & msub.s instructions has previously been implemented as 2 different functions, namely ieee754sp_maddf & ieee754sp_msubf. The difference in behaviour of these 2 instructions is merely the sign of the product, so we can easily share the code implementing them. Do this for the single precision variant, removing the original ieee754sp_msubf in favor of reusing the code from ieee754sp_maddf. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13154/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips')
-rw-r--r--arch/mips/math-emu/Makefile2
-rw-r--r--arch/mips/math-emu/sp_maddf.c22
-rw-r--r--arch/mips/math-emu/sp_msubf.c258
3 files changed, 21 insertions, 261 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile
index a19641d3ac23..3389aff21783 100644
--- a/arch/mips/math-emu/Makefile
+++ b/arch/mips/math-emu/Makefile
@@ -6,7 +6,7 @@ obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \
6 dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ 6 dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \
7 dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o dp_2008class.o dp_fmin.o dp_fmax.o \ 7 dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o dp_2008class.o dp_fmin.o dp_fmax.o \
8 sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ 8 sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \
9 sp_tint.o sp_fint.o sp_maddf.o sp_msubf.o sp_2008class.o sp_fmin.o sp_fmax.o \ 9 sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \
10 dsemul.o 10 dsemul.o
11 11
12lib-y += ieee754d.o \ 12lib-y += ieee754d.o \
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index dd1dd83e34eb..93b7132d60e2 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -14,8 +14,12 @@
14 14
15#include "ieee754sp.h" 15#include "ieee754sp.h"
16 16
17union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, 17enum maddf_flags {
18 union ieee754sp y) 18 maddf_negate_product = 1 << 0,
19};
20
21static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
22 union ieee754sp y, enum maddf_flags flags)
19{ 23{
20 int re; 24 int re;
21 int rs; 25 int rs;
@@ -154,6 +158,8 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
154 158
155 re = xe + ye; 159 re = xe + ye;
156 rs = xs ^ ys; 160 rs = xs ^ ys;
161 if (flags & maddf_negate_product)
162 rs ^= 1;
157 163
158 /* shunt to top of word */ 164 /* shunt to top of word */
159 xm <<= 32 - (SP_FBITS + 1); 165 xm <<= 32 - (SP_FBITS + 1);
@@ -253,3 +259,15 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
253 } 259 }
254 return ieee754sp_format(zs, ze, zm); 260 return ieee754sp_format(zs, ze, zm);
255} 261}
262
263union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
264 union ieee754sp y)
265{
266 return _sp_maddf(z, x, y, 0);
267}
268
269union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
270 union ieee754sp y)
271{
272 return _sp_maddf(z, x, y, maddf_negate_product);
273}
diff --git a/arch/mips/math-emu/sp_msubf.c b/arch/mips/math-emu/sp_msubf.c
deleted file mode 100644
index 81c38b980d69..000000000000
--- a/arch/mips/math-emu/sp_msubf.c
+++ /dev/null
@@ -1,258 +0,0 @@
1/*
2 * IEEE754 floating point arithmetic
3 * single precision: MSUB.f (Fused Multiply Subtract)
4 * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])
5 *
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
9 *
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
13 */
14
15#include "ieee754sp.h"
16
17union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
18 union ieee754sp y)
19{
20 int re;
21 int rs;
22 unsigned rm;
23 unsigned short lxm;
24 unsigned short hxm;
25 unsigned short lym;
26 unsigned short hym;
27 unsigned lrm;
28 unsigned hrm;
29 unsigned t;
30 unsigned at;
31 int s;
32
33 COMPXSP;
34 COMPYSP;
35 u32 zm; int ze; int zs __maybe_unused; int zc;
36
37 EXPLODEXSP;
38 EXPLODEYSP;
39 EXPLODESP(z, zc, zs, ze, zm)
40
41 FLUSHXSP;
42 FLUSHYSP;
43 FLUSHSP(z, zc, zs, ze, zm);
44
45 ieee754_clearcx();
46
47 switch (zc) {
48 case IEEE754_CLASS_SNAN:
49 ieee754_setcx(IEEE754_INVALID_OPERATION);
50 return ieee754sp_nanxcpt(z);
51 case IEEE754_CLASS_DNORM:
52 SPDNORMx(zm, ze);
53 /* QNAN is handled separately below */
54 }
55
56 switch (CLPAIR(xc, yc)) {
57 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
58 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
59 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
60 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
61 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
62 return ieee754sp_nanxcpt(y);
63
64 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
65 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
66 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
67 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
68 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
70 return ieee754sp_nanxcpt(x);
71
72 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
73 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
74 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
75 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
76 return y;
77
78 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
79 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
80 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
81 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
82 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
83 return x;
84
85 /*
86 * Infinity handling
87 */
88 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
89 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
90 if (zc == IEEE754_CLASS_QNAN)
91 return z;
92 ieee754_setcx(IEEE754_INVALID_OPERATION);
93 return ieee754sp_indef();
94
95 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
96 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
97 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
98 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
99 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
100 if (zc == IEEE754_CLASS_QNAN)
101 return z;
102 return ieee754sp_inf(xs ^ ys);
103
104 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
105 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
106 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
107 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
108 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
109 if (zc == IEEE754_CLASS_INF)
110 return ieee754sp_inf(zs);
111 /* Multiplication is 0 so just return z */
112 return z;
113
114 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
115 SPDNORMX;
116
117 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
118 if (zc == IEEE754_CLASS_QNAN)
119 return z;
120 else if (zc == IEEE754_CLASS_INF)
121 return ieee754sp_inf(zs);
122 SPDNORMY;
123 break;
124
125 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
126 if (zc == IEEE754_CLASS_QNAN)
127 return z;
128 else if (zc == IEEE754_CLASS_INF)
129 return ieee754sp_inf(zs);
130 SPDNORMX;
131 break;
132
133 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
134 if (zc == IEEE754_CLASS_QNAN)
135 return z;
136 else if (zc == IEEE754_CLASS_INF)
137 return ieee754sp_inf(zs);
138 /* fall through to real compuation */
139 }
140
141 /* Finally get to do some computation */
142
143 /*
144 * Do the multiplication bit first
145 *
146 * rm = xm * ym, re = xe + ye basically
147 *
148 * At this point xm and ym should have been normalized.
149 */
150
151 /* rm = xm * ym, re = xe+ye basically */
152 assert(xm & SP_HIDDEN_BIT);
153 assert(ym & SP_HIDDEN_BIT);
154
155 re = xe + ye;
156 rs = xs ^ ys;
157
158 /* shunt to top of word */
159 xm <<= 32 - (SP_FBITS + 1);
160 ym <<= 32 - (SP_FBITS + 1);
161
162 /*
163 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
164 */
165 lxm = xm & 0xffff;
166 hxm = xm >> 16;
167 lym = ym & 0xffff;
168 hym = ym >> 16;
169
170 lrm = lxm * lym; /* 16 * 16 => 32 */
171 hrm = hxm * hym; /* 16 * 16 => 32 */
172
173 t = lxm * hym; /* 16 * 16 => 32 */
174 at = lrm + (t << 16);
175 hrm += at < lrm;
176 lrm = at;
177 hrm = hrm + (t >> 16);
178
179 t = hxm * lym; /* 16 * 16 => 32 */
180 at = lrm + (t << 16);
181 hrm += at < lrm;
182 lrm = at;
183 hrm = hrm + (t >> 16);
184
185 rm = hrm | (lrm != 0);
186
187 /*
188 * Sticky shift down to normal rounding precision.
189 */
190 if ((int) rm < 0) {
191 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
192 ((rm << (SP_FBITS + 1 + 3)) != 0);
193 re++;
194 } else {
195 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
196 ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
197 }
198 assert(rm & (SP_HIDDEN_BIT << 3));
199
200 /* And now the subtraction */
201
202 /* Flip sign of r and handle as add */
203 rs ^= 1;
204
205 assert(zm & SP_HIDDEN_BIT);
206
207 /*
208 * Provide guard,round and stick bit space.
209 */
210 zm <<= 3;
211
212 if (ze > re) {
213 /*
214 * Have to shift y fraction right to align.
215 */
216 s = ze - re;
217 SPXSRSYn(s);
218 } else if (re > ze) {
219 /*
220 * Have to shift x fraction right to align.
221 */
222 s = re - ze;
223 SPXSRSYn(s);
224 }
225 assert(ze == re);
226 assert(ze <= SP_EMAX);
227
228 if (zs == rs) {
229 /*
230 * Generate 28 bit result of adding two 27 bit numbers
231 * leaving result in zm, zs and ze.
232 */
233 zm = zm + rm;
234
235 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
236 SPXSRSX1(); /* shift preserving sticky */
237 }
238 } else {
239 if (zm >= rm) {
240 zm = zm - rm;
241 } else {
242 zm = rm - zm;
243 zs = rs;
244 }
245 if (zm == 0)
246 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
247
248 /*
249 * Normalize in extended single precision
250 */
251 while ((zm >> (SP_MBITS + 3)) == 0) {
252 zm <<= 1;
253 ze--;
254 }
255
256 }
257 return ieee754sp_format(zs, ze, zm);
258}