diff options
Diffstat (limited to 'arch/ppc/kernel/vecemu.c')
-rw-r--r-- | arch/ppc/kernel/vecemu.c | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/arch/ppc/kernel/vecemu.c b/arch/ppc/kernel/vecemu.c new file mode 100644 index 000000000000..604d0947cb20 --- /dev/null +++ b/arch/ppc/kernel/vecemu.c | |||
@@ -0,0 +1,345 @@ | |||
1 | /* | ||
2 | * Routines to emulate some Altivec/VMX instructions, specifically | ||
3 | * those that can trap when given denormalized operands in Java mode. | ||
4 | */ | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/errno.h> | ||
7 | #include <linux/sched.h> | ||
8 | #include <asm/ptrace.h> | ||
9 | #include <asm/processor.h> | ||
10 | #include <asm/uaccess.h> | ||
11 | |||
12 | /* Functions in vector.S */ | ||
13 | extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); | ||
14 | extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); | ||
15 | extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); | ||
16 | extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); | ||
17 | extern void vrefp(vector128 *dst, vector128 *src); | ||
18 | extern void vrsqrtefp(vector128 *dst, vector128 *src); | ||
19 | extern void vexptep(vector128 *dst, vector128 *src); | ||
20 | |||
21 | static unsigned int exp2s[8] = { | ||
22 | 0x800000, | ||
23 | 0x8b95c2, | ||
24 | 0x9837f0, | ||
25 | 0xa5fed7, | ||
26 | 0xb504f3, | ||
27 | 0xc5672a, | ||
28 | 0xd744fd, | ||
29 | 0xeac0c7 | ||
30 | }; | ||
31 | |||
32 | /* | ||
33 | * Computes an estimate of 2^x. The `s' argument is the 32-bit | ||
34 | * single-precision floating-point representation of x. | ||
35 | */ | ||
36 | static unsigned int eexp2(unsigned int s) | ||
37 | { | ||
38 | int exp, pwr; | ||
39 | unsigned int mant, frac; | ||
40 | |||
41 | /* extract exponent field from input */ | ||
42 | exp = ((s >> 23) & 0xff) - 127; | ||
43 | if (exp > 7) { | ||
44 | /* check for NaN input */ | ||
45 | if (exp == 128 && (s & 0x7fffff) != 0) | ||
46 | return s | 0x400000; /* return QNaN */ | ||
47 | /* 2^-big = 0, 2^+big = +Inf */ | ||
48 | return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ | ||
49 | } | ||
50 | if (exp < -23) | ||
51 | return 0x3f800000; /* 1.0 */ | ||
52 | |||
53 | /* convert to fixed point integer in 9.23 representation */ | ||
54 | pwr = (s & 0x7fffff) | 0x800000; | ||
55 | if (exp > 0) | ||
56 | pwr <<= exp; | ||
57 | else | ||
58 | pwr >>= -exp; | ||
59 | if (s & 0x80000000) | ||
60 | pwr = -pwr; | ||
61 | |||
62 | /* extract integer part, which becomes exponent part of result */ | ||
63 | exp = (pwr >> 23) + 126; | ||
64 | if (exp >= 254) | ||
65 | return 0x7f800000; | ||
66 | if (exp < -23) | ||
67 | return 0; | ||
68 | |||
69 | /* table lookup on top 3 bits of fraction to get mantissa */ | ||
70 | mant = exp2s[(pwr >> 20) & 7]; | ||
71 | |||
72 | /* linear interpolation using remaining 20 bits of fraction */ | ||
73 | asm("mulhwu %0,%1,%2" : "=r" (frac) | ||
74 | : "r" (pwr << 12), "r" (0x172b83ff)); | ||
75 | asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); | ||
76 | mant += frac; | ||
77 | |||
78 | if (exp >= 0) | ||
79 | return mant + (exp << 23); | ||
80 | |||
81 | /* denormalized result */ | ||
82 | exp = -exp; | ||
83 | mant += 1 << (exp - 1); | ||
84 | return mant >> exp; | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Computes an estimate of log_2(x). The `s' argument is the 32-bit | ||
89 | * single-precision floating-point representation of x. | ||
90 | */ | ||
91 | static unsigned int elog2(unsigned int s) | ||
92 | { | ||
93 | int exp, mant, lz, frac; | ||
94 | |||
95 | exp = s & 0x7f800000; | ||
96 | mant = s & 0x7fffff; | ||
97 | if (exp == 0x7f800000) { /* Inf or NaN */ | ||
98 | if (mant != 0) | ||
99 | s |= 0x400000; /* turn NaN into QNaN */ | ||
100 | return s; | ||
101 | } | ||
102 | if ((exp | mant) == 0) /* +0 or -0 */ | ||
103 | return 0xff800000; /* return -Inf */ | ||
104 | |||
105 | if (exp == 0) { | ||
106 | /* denormalized */ | ||
107 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); | ||
108 | mant <<= lz - 8; | ||
109 | exp = (-118 - lz) << 23; | ||
110 | } else { | ||
111 | mant |= 0x800000; | ||
112 | exp -= 127 << 23; | ||
113 | } | ||
114 | |||
115 | if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ | ||
116 | exp |= 0x400000; /* 0.5 * 2^23 */ | ||
117 | asm("mulhwu %0,%1,%2" : "=r" (mant) | ||
118 | : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ | ||
119 | } | ||
120 | if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ | ||
121 | exp |= 0x200000; /* 0.25 * 2^23 */ | ||
122 | asm("mulhwu %0,%1,%2" : "=r" (mant) | ||
123 | : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ | ||
124 | } | ||
125 | if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ | ||
126 | exp |= 0x100000; /* 0.125 * 2^23 */ | ||
127 | asm("mulhwu %0,%1,%2" : "=r" (mant) | ||
128 | : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ | ||
129 | } | ||
130 | if (mant > 0x800000) { /* 1.0 * 2^23 */ | ||
131 | /* calculate (mant - 1) * 1.381097463 */ | ||
132 | /* 1.381097463 == 0.125 / (2^0.125 - 1) */ | ||
133 | asm("mulhwu %0,%1,%2" : "=r" (frac) | ||
134 | : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); | ||
135 | exp += frac; | ||
136 | } | ||
137 | s = exp & 0x80000000; | ||
138 | if (exp != 0) { | ||
139 | if (s) | ||
140 | exp = -exp; | ||
141 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); | ||
142 | lz = 8 - lz; | ||
143 | if (lz > 0) | ||
144 | exp >>= lz; | ||
145 | else if (lz < 0) | ||
146 | exp <<= -lz; | ||
147 | s += ((lz + 126) << 23) + exp; | ||
148 | } | ||
149 | return s; | ||
150 | } | ||
151 | |||
152 | #define VSCR_SAT 1 | ||
153 | |||
154 | static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) | ||
155 | { | ||
156 | int exp, mant; | ||
157 | |||
158 | exp = (x >> 23) & 0xff; | ||
159 | mant = x & 0x7fffff; | ||
160 | if (exp == 255 && mant != 0) | ||
161 | return 0; /* NaN -> 0 */ | ||
162 | exp = exp - 127 + scale; | ||
163 | if (exp < 0) | ||
164 | return 0; /* round towards zero */ | ||
165 | if (exp >= 31) { | ||
166 | /* saturate, unless the result would be -2^31 */ | ||
167 | if (x + (scale << 23) != 0xcf000000) | ||
168 | *vscrp |= VSCR_SAT; | ||
169 | return (x & 0x80000000)? 0x80000000: 0x7fffffff; | ||
170 | } | ||
171 | mant |= 0x800000; | ||
172 | mant = (mant << 7) >> (30 - exp); | ||
173 | return (x & 0x80000000)? -mant: mant; | ||
174 | } | ||
175 | |||
176 | static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) | ||
177 | { | ||
178 | int exp; | ||
179 | unsigned int mant; | ||
180 | |||
181 | exp = (x >> 23) & 0xff; | ||
182 | mant = x & 0x7fffff; | ||
183 | if (exp == 255 && mant != 0) | ||
184 | return 0; /* NaN -> 0 */ | ||
185 | exp = exp - 127 + scale; | ||
186 | if (exp < 0) | ||
187 | return 0; /* round towards zero */ | ||
188 | if (x & 0x80000000) { | ||
189 | /* negative => saturate to 0 */ | ||
190 | *vscrp |= VSCR_SAT; | ||
191 | return 0; | ||
192 | } | ||
193 | if (exp >= 32) { | ||
194 | /* saturate */ | ||
195 | *vscrp |= VSCR_SAT; | ||
196 | return 0xffffffff; | ||
197 | } | ||
198 | mant |= 0x800000; | ||
199 | mant = (mant << 8) >> (31 - exp); | ||
200 | return mant; | ||
201 | } | ||
202 | |||
203 | /* Round to floating integer, towards 0 */ | ||
204 | static unsigned int rfiz(unsigned int x) | ||
205 | { | ||
206 | int exp; | ||
207 | |||
208 | exp = ((x >> 23) & 0xff) - 127; | ||
209 | if (exp == 128 && (x & 0x7fffff) != 0) | ||
210 | return x | 0x400000; /* NaN -> make it a QNaN */ | ||
211 | if (exp >= 23) | ||
212 | return x; /* it's an integer already (or Inf) */ | ||
213 | if (exp < 0) | ||
214 | return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ | ||
215 | return x & ~(0x7fffff >> exp); | ||
216 | } | ||
217 | |||
218 | /* Round to floating integer, towards +/- Inf */ | ||
219 | static unsigned int rfii(unsigned int x) | ||
220 | { | ||
221 | int exp, mask; | ||
222 | |||
223 | exp = ((x >> 23) & 0xff) - 127; | ||
224 | if (exp == 128 && (x & 0x7fffff) != 0) | ||
225 | return x | 0x400000; /* NaN -> make it a QNaN */ | ||
226 | if (exp >= 23) | ||
227 | return x; /* it's an integer already (or Inf) */ | ||
228 | if ((x & 0x7fffffff) == 0) | ||
229 | return x; /* +/-0 -> +/-0 */ | ||
230 | if (exp < 0) | ||
231 | /* 0 < |x| < 1.0 rounds to +/- 1.0 */ | ||
232 | return (x & 0x80000000) | 0x3f800000; | ||
233 | mask = 0x7fffff >> exp; | ||
234 | /* mantissa overflows into exponent - that's OK, | ||
235 | it can't overflow into the sign bit */ | ||
236 | return (x + mask) & ~mask; | ||
237 | } | ||
238 | |||
239 | /* Round to floating integer, to nearest */ | ||
240 | static unsigned int rfin(unsigned int x) | ||
241 | { | ||
242 | int exp, half; | ||
243 | |||
244 | exp = ((x >> 23) & 0xff) - 127; | ||
245 | if (exp == 128 && (x & 0x7fffff) != 0) | ||
246 | return x | 0x400000; /* NaN -> make it a QNaN */ | ||
247 | if (exp >= 23) | ||
248 | return x; /* it's an integer already (or Inf) */ | ||
249 | if (exp < -1) | ||
250 | return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ | ||
251 | if (exp == -1) | ||
252 | /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ | ||
253 | return (x & 0x80000000) | 0x3f800000; | ||
254 | half = 0x400000 >> exp; | ||
255 | /* add 0.5 to the magnitude and chop off the fraction bits */ | ||
256 | return (x + half) & ~(0x7fffff >> exp); | ||
257 | } | ||
258 | |||
259 | int emulate_altivec(struct pt_regs *regs) | ||
260 | { | ||
261 | unsigned int instr, i; | ||
262 | unsigned int va, vb, vc, vd; | ||
263 | vector128 *vrs; | ||
264 | |||
265 | if (get_user(instr, (unsigned int __user *) regs->nip)) | ||
266 | return -EFAULT; | ||
267 | if ((instr >> 26) != 4) | ||
268 | return -EINVAL; /* not an altivec instruction */ | ||
269 | vd = (instr >> 21) & 0x1f; | ||
270 | va = (instr >> 16) & 0x1f; | ||
271 | vb = (instr >> 11) & 0x1f; | ||
272 | vc = (instr >> 6) & 0x1f; | ||
273 | |||
274 | vrs = current->thread.vr; | ||
275 | switch (instr & 0x3f) { | ||
276 | case 10: | ||
277 | switch (vc) { | ||
278 | case 0: /* vaddfp */ | ||
279 | vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); | ||
280 | break; | ||
281 | case 1: /* vsubfp */ | ||
282 | vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); | ||
283 | break; | ||
284 | case 4: /* vrefp */ | ||
285 | vrefp(&vrs[vd], &vrs[vb]); | ||
286 | break; | ||
287 | case 5: /* vrsqrtefp */ | ||
288 | vrsqrtefp(&vrs[vd], &vrs[vb]); | ||
289 | break; | ||
290 | case 6: /* vexptefp */ | ||
291 | for (i = 0; i < 4; ++i) | ||
292 | vrs[vd].u[i] = eexp2(vrs[vb].u[i]); | ||
293 | break; | ||
294 | case 7: /* vlogefp */ | ||
295 | for (i = 0; i < 4; ++i) | ||
296 | vrs[vd].u[i] = elog2(vrs[vb].u[i]); | ||
297 | break; | ||
298 | case 8: /* vrfin */ | ||
299 | for (i = 0; i < 4; ++i) | ||
300 | vrs[vd].u[i] = rfin(vrs[vb].u[i]); | ||
301 | break; | ||
302 | case 9: /* vrfiz */ | ||
303 | for (i = 0; i < 4; ++i) | ||
304 | vrs[vd].u[i] = rfiz(vrs[vb].u[i]); | ||
305 | break; | ||
306 | case 10: /* vrfip */ | ||
307 | for (i = 0; i < 4; ++i) { | ||
308 | u32 x = vrs[vb].u[i]; | ||
309 | x = (x & 0x80000000)? rfiz(x): rfii(x); | ||
310 | vrs[vd].u[i] = x; | ||
311 | } | ||
312 | break; | ||
313 | case 11: /* vrfim */ | ||
314 | for (i = 0; i < 4; ++i) { | ||
315 | u32 x = vrs[vb].u[i]; | ||
316 | x = (x & 0x80000000)? rfii(x): rfiz(x); | ||
317 | vrs[vd].u[i] = x; | ||
318 | } | ||
319 | break; | ||
320 | case 14: /* vctuxs */ | ||
321 | for (i = 0; i < 4; ++i) | ||
322 | vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, | ||
323 | ¤t->thread.vscr.u[3]); | ||
324 | break; | ||
325 | case 15: /* vctsxs */ | ||
326 | for (i = 0; i < 4; ++i) | ||
327 | vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, | ||
328 | ¤t->thread.vscr.u[3]); | ||
329 | break; | ||
330 | default: | ||
331 | return -EINVAL; | ||
332 | } | ||
333 | break; | ||
334 | case 46: /* vmaddfp */ | ||
335 | vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); | ||
336 | break; | ||
337 | case 47: /* vnmsubfp */ | ||
338 | vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); | ||
339 | break; | ||
340 | default: | ||
341 | return -EINVAL; | ||
342 | } | ||
343 | |||
344 | return 0; | ||
345 | } | ||