diff options
Diffstat (limited to 'arch/arm/vfp')
-rw-r--r-- | arch/arm/vfp/Makefile | 12 | ||||
-rw-r--r-- | arch/arm/vfp/entry.S | 45 | ||||
-rw-r--r-- | arch/arm/vfp/vfp.h | 344 | ||||
-rw-r--r-- | arch/arm/vfp/vfpdouble.c | 1186 | ||||
-rw-r--r-- | arch/arm/vfp/vfphw.S | 215 | ||||
-rw-r--r-- | arch/arm/vfp/vfpinstr.h | 88 | ||||
-rw-r--r-- | arch/arm/vfp/vfpmodule.c | 288 | ||||
-rw-r--r-- | arch/arm/vfp/vfpsingle.c | 1224 |
8 files changed, 3402 insertions, 0 deletions
diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile new file mode 100644 index 000000000000..afabac31dd1d --- /dev/null +++ b/arch/arm/vfp/Makefile | |||
@@ -0,0 +1,12 @@ | |||
1 | # | ||
2 | # linux/arch/arm/vfp/Makefile | ||
3 | # | ||
4 | # Copyright (C) 2001 ARM Limited | ||
5 | # | ||
6 | |||
7 | # EXTRA_CFLAGS := -DDEBUG | ||
8 | # EXTRA_AFLAGS := -DDEBUG | ||
9 | |||
10 | obj-y += vfp.o | ||
11 | |||
12 | vfp-$(CONFIG_VFP) += entry.o vfpmodule.o vfphw.o vfpsingle.o vfpdouble.o | ||
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S new file mode 100644 index 000000000000..e73c8deca592 --- /dev/null +++ b/arch/arm/vfp/entry.S | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/vfp/entry.S | ||
3 | * | ||
4 | * Copyright (C) 2004 ARM Limited. | ||
5 | * Written by Deep Blue Solutions Limited. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * Basic entry code, called from the kernel's undefined instruction trap. | ||
12 | * r0 = faulted instruction | ||
13 | * r5 = faulted PC+4 | ||
14 | * r9 = successful return | ||
15 | * r10 = thread_info structure | ||
16 | * lr = failure return | ||
17 | */ | ||
18 | #include <linux/linkage.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <asm/constants.h> | ||
21 | #include <asm/vfpmacros.h> | ||
22 | |||
23 | .globl do_vfp | ||
24 | do_vfp: | ||
25 | ldr r4, .LCvfp | ||
26 | add r10, r10, #TI_VFPSTATE @ r10 = workspace | ||
27 | ldr pc, [r4] @ call VFP entry point | ||
28 | |||
29 | .LCvfp: | ||
30 | .word vfp_vector | ||
31 | |||
32 | @ This code is called if the VFP does not exist. It needs to flag the | ||
33 | @ failure to the VFP initialisation code. | ||
34 | |||
35 | __INIT | ||
36 | .globl vfp_testing_entry | ||
37 | vfp_testing_entry: | ||
38 | ldr r0, VFP_arch_address | ||
39 | str r5, [r0] @ known non-zero value | ||
40 | mov pc, r9 @ we have handled the fault | ||
41 | |||
42 | VFP_arch_address: | ||
43 | .word VFP_arch | ||
44 | |||
45 | __FINIT | ||
diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h new file mode 100644 index 000000000000..55a02bc994a3 --- /dev/null +++ b/arch/arm/vfp/vfp.h | |||
@@ -0,0 +1,344 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/vfp/vfp.h | ||
3 | * | ||
4 | * Copyright (C) 2004 ARM Limited. | ||
5 | * Written by Deep Blue Solutions Limited. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) | ||
13 | { | ||
14 | if (shift) { | ||
15 | if (shift < 32) | ||
16 | val = val >> shift | ((val << (32 - shift)) != 0); | ||
17 | else | ||
18 | val = val != 0; | ||
19 | } | ||
20 | return val; | ||
21 | } | ||
22 | |||
23 | static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) | ||
24 | { | ||
25 | if (shift) { | ||
26 | if (shift < 64) | ||
27 | val = val >> shift | ((val << (64 - shift)) != 0); | ||
28 | else | ||
29 | val = val != 0; | ||
30 | } | ||
31 | return val; | ||
32 | } | ||
33 | |||
34 | static inline u32 vfp_hi64to32jamming(u64 val) | ||
35 | { | ||
36 | u32 v; | ||
37 | |||
38 | asm( | ||
39 | "cmp %Q1, #1 @ vfp_hi64to32jamming\n\t" | ||
40 | "movcc %0, %R1\n\t" | ||
41 | "orrcs %0, %R1, #1" | ||
42 | : "=r" (v) : "r" (val) : "cc"); | ||
43 | |||
44 | return v; | ||
45 | } | ||
46 | |||
47 | static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | ||
48 | { | ||
49 | asm( "adds %Q0, %Q2, %Q4\n\t" | ||
50 | "adcs %R0, %R2, %R4\n\t" | ||
51 | "adcs %Q1, %Q3, %Q5\n\t" | ||
52 | "adc %R1, %R3, %R5" | ||
53 | : "=r" (nl), "=r" (nh) | ||
54 | : "0" (nl), "1" (nh), "r" (ml), "r" (mh) | ||
55 | : "cc"); | ||
56 | *resh = nh; | ||
57 | *resl = nl; | ||
58 | } | ||
59 | |||
60 | static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | ||
61 | { | ||
62 | asm( "subs %Q0, %Q2, %Q4\n\t" | ||
63 | "sbcs %R0, %R2, %R4\n\t" | ||
64 | "sbcs %Q1, %Q3, %Q5\n\t" | ||
65 | "sbc %R1, %R3, %R5\n\t" | ||
66 | : "=r" (nl), "=r" (nh) | ||
67 | : "0" (nl), "1" (nh), "r" (ml), "r" (mh) | ||
68 | : "cc"); | ||
69 | *resh = nh; | ||
70 | *resl = nl; | ||
71 | } | ||
72 | |||
73 | static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m) | ||
74 | { | ||
75 | u32 nh, nl, mh, ml; | ||
76 | u64 rh, rma, rmb, rl; | ||
77 | |||
78 | nl = n; | ||
79 | ml = m; | ||
80 | rl = (u64)nl * ml; | ||
81 | |||
82 | nh = n >> 32; | ||
83 | rma = (u64)nh * ml; | ||
84 | |||
85 | mh = m >> 32; | ||
86 | rmb = (u64)nl * mh; | ||
87 | rma += rmb; | ||
88 | |||
89 | rh = (u64)nh * mh; | ||
90 | rh += ((u64)(rma < rmb) << 32) + (rma >> 32); | ||
91 | |||
92 | rma <<= 32; | ||
93 | rl += rma; | ||
94 | rh += (rl < rma); | ||
95 | |||
96 | *resl = rl; | ||
97 | *resh = rh; | ||
98 | } | ||
99 | |||
100 | static inline void shift64left(u64 *resh, u64 *resl, u64 n) | ||
101 | { | ||
102 | *resh = n >> 63; | ||
103 | *resl = n << 1; | ||
104 | } | ||
105 | |||
106 | static inline u64 vfp_hi64multiply64(u64 n, u64 m) | ||
107 | { | ||
108 | u64 rh, rl; | ||
109 | mul64to128(&rh, &rl, n, m); | ||
110 | return rh | (rl != 0); | ||
111 | } | ||
112 | |||
113 | static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) | ||
114 | { | ||
115 | u64 mh, ml, remh, reml, termh, terml, z; | ||
116 | |||
117 | if (nh >= m) | ||
118 | return ~0ULL; | ||
119 | mh = m >> 32; | ||
120 | z = (mh << 32 <= nh) ? 0xffffffff00000000ULL : (nh / mh) << 32; | ||
121 | mul64to128(&termh, &terml, m, z); | ||
122 | sub128(&remh, &reml, nh, nl, termh, terml); | ||
123 | ml = m << 32; | ||
124 | while ((s64)remh < 0) { | ||
125 | z -= 0x100000000ULL; | ||
126 | add128(&remh, &reml, remh, reml, mh, ml); | ||
127 | } | ||
128 | remh = (remh << 32) | (reml >> 32); | ||
129 | z |= (mh << 32 <= remh) ? 0xffffffff : remh / mh; | ||
130 | return z; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * Operations on unpacked elements | ||
135 | */ | ||
136 | #define vfp_sign_negate(sign) (sign ^ 0x8000) | ||
137 | |||
138 | /* | ||
139 | * Single-precision | ||
140 | */ | ||
141 | struct vfp_single { | ||
142 | s16 exponent; | ||
143 | u16 sign; | ||
144 | u32 significand; | ||
145 | }; | ||
146 | |||
147 | extern s32 vfp_get_float(unsigned int reg); | ||
148 | extern void vfp_put_float(unsigned int reg, s32 val); | ||
149 | |||
150 | /* | ||
151 | * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa | ||
152 | * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent | ||
153 | * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand | ||
154 | * which are not propagated to the float upon packing. | ||
155 | */ | ||
156 | #define VFP_SINGLE_MANTISSA_BITS (23) | ||
157 | #define VFP_SINGLE_EXPONENT_BITS (8) | ||
158 | #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) | ||
159 | #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) | ||
160 | |||
161 | /* | ||
162 | * The bit in an unpacked float which indicates that it is a quiet NaN | ||
163 | */ | ||
164 | #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) | ||
165 | |||
166 | /* | ||
167 | * Operations on packed single-precision numbers | ||
168 | */ | ||
169 | #define vfp_single_packed_sign(v) ((v) & 0x80000000) | ||
170 | #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) | ||
171 | #define vfp_single_packed_abs(v) ((v) & ~0x80000000) | ||
172 | #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) | ||
173 | #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) | ||
174 | |||
175 | /* | ||
176 | * Unpack a single-precision float. Note that this returns the magnitude | ||
177 | * of the single-precision float mantissa with the 1. if necessary, | ||
178 | * aligned to bit 30. | ||
179 | */ | ||
180 | static inline void vfp_single_unpack(struct vfp_single *s, s32 val) | ||
181 | { | ||
182 | u32 significand; | ||
183 | |||
184 | s->sign = vfp_single_packed_sign(val) >> 16, | ||
185 | s->exponent = vfp_single_packed_exponent(val); | ||
186 | |||
187 | significand = (u32) val; | ||
188 | significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; | ||
189 | if (s->exponent && s->exponent != 255) | ||
190 | significand |= 0x40000000; | ||
191 | s->significand = significand; | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * Re-pack a single-precision float. This assumes that the float is | ||
196 | * already normalised such that the MSB is bit 30, _not_ bit 31. | ||
197 | */ | ||
198 | static inline s32 vfp_single_pack(struct vfp_single *s) | ||
199 | { | ||
200 | u32 val; | ||
201 | val = (s->sign << 16) + | ||
202 | (s->exponent << VFP_SINGLE_MANTISSA_BITS) + | ||
203 | (s->significand >> VFP_SINGLE_LOW_BITS); | ||
204 | return (s32)val; | ||
205 | } | ||
206 | |||
207 | #define VFP_NUMBER (1<<0) | ||
208 | #define VFP_ZERO (1<<1) | ||
209 | #define VFP_DENORMAL (1<<2) | ||
210 | #define VFP_INFINITY (1<<3) | ||
211 | #define VFP_NAN (1<<4) | ||
212 | #define VFP_NAN_SIGNAL (1<<5) | ||
213 | |||
214 | #define VFP_QNAN (VFP_NAN) | ||
215 | #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL) | ||
216 | |||
217 | static inline int vfp_single_type(struct vfp_single *s) | ||
218 | { | ||
219 | int type = VFP_NUMBER; | ||
220 | if (s->exponent == 255) { | ||
221 | if (s->significand == 0) | ||
222 | type = VFP_INFINITY; | ||
223 | else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) | ||
224 | type = VFP_QNAN; | ||
225 | else | ||
226 | type = VFP_SNAN; | ||
227 | } else if (s->exponent == 0) { | ||
228 | if (s->significand == 0) | ||
229 | type |= VFP_ZERO; | ||
230 | else | ||
231 | type |= VFP_DENORMAL; | ||
232 | } | ||
233 | return type; | ||
234 | } | ||
235 | |||
236 | #ifndef DEBUG | ||
237 | #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) | ||
238 | u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions); | ||
239 | #else | ||
240 | u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func); | ||
241 | #endif | ||
242 | |||
243 | /* | ||
244 | * Double-precision | ||
245 | */ | ||
246 | struct vfp_double { | ||
247 | s16 exponent; | ||
248 | u16 sign; | ||
249 | u64 significand; | ||
250 | }; | ||
251 | |||
252 | /* | ||
253 | * VFP_REG_ZERO is a special register number for vfp_get_double | ||
254 | * which returns (double)0.0. This is useful for the compare with | ||
255 | * zero instructions. | ||
256 | */ | ||
257 | #define VFP_REG_ZERO 16 | ||
258 | extern u64 vfp_get_double(unsigned int reg); | ||
259 | extern void vfp_put_double(unsigned int reg, u64 val); | ||
260 | |||
261 | #define VFP_DOUBLE_MANTISSA_BITS (52) | ||
262 | #define VFP_DOUBLE_EXPONENT_BITS (11) | ||
263 | #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) | ||
264 | #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) | ||
265 | |||
266 | /* | ||
267 | * The bit in an unpacked double which indicates that it is a quiet NaN | ||
268 | */ | ||
269 | #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) | ||
270 | |||
271 | /* | ||
272 | * Operations on packed single-precision numbers | ||
273 | */ | ||
274 | #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) | ||
275 | #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) | ||
276 | #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) | ||
277 | #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) | ||
278 | #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) | ||
279 | |||
280 | /* | ||
281 | * Unpack a double-precision float. Note that this returns the magnitude | ||
282 | * of the double-precision float mantissa with the 1. if necessary, | ||
283 | * aligned to bit 62. | ||
284 | */ | ||
285 | static inline void vfp_double_unpack(struct vfp_double *s, s64 val) | ||
286 | { | ||
287 | u64 significand; | ||
288 | |||
289 | s->sign = vfp_double_packed_sign(val) >> 48; | ||
290 | s->exponent = vfp_double_packed_exponent(val); | ||
291 | |||
292 | significand = (u64) val; | ||
293 | significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; | ||
294 | if (s->exponent && s->exponent != 2047) | ||
295 | significand |= (1ULL << 62); | ||
296 | s->significand = significand; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * Re-pack a double-precision float. This assumes that the float is | ||
301 | * already normalised such that the MSB is bit 30, _not_ bit 31. | ||
302 | */ | ||
303 | static inline s64 vfp_double_pack(struct vfp_double *s) | ||
304 | { | ||
305 | u64 val; | ||
306 | val = ((u64)s->sign << 48) + | ||
307 | ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + | ||
308 | (s->significand >> VFP_DOUBLE_LOW_BITS); | ||
309 | return (s64)val; | ||
310 | } | ||
311 | |||
312 | static inline int vfp_double_type(struct vfp_double *s) | ||
313 | { | ||
314 | int type = VFP_NUMBER; | ||
315 | if (s->exponent == 2047) { | ||
316 | if (s->significand == 0) | ||
317 | type = VFP_INFINITY; | ||
318 | else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) | ||
319 | type = VFP_QNAN; | ||
320 | else | ||
321 | type = VFP_SNAN; | ||
322 | } else if (s->exponent == 0) { | ||
323 | if (s->significand == 0) | ||
324 | type |= VFP_ZERO; | ||
325 | else | ||
326 | type |= VFP_DENORMAL; | ||
327 | } | ||
328 | return type; | ||
329 | } | ||
330 | |||
331 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); | ||
332 | |||
333 | /* | ||
334 | * System registers | ||
335 | */ | ||
336 | extern u32 vfp_get_sys(unsigned int reg); | ||
337 | extern void vfp_put_sys(unsigned int reg, u32 val); | ||
338 | |||
339 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); | ||
340 | |||
341 | /* | ||
342 | * A special flag to tell the normalisation code not to normalise. | ||
343 | */ | ||
344 | #define VFP_NAN_FLAG 0x100 | ||
diff --git a/arch/arm/vfp/vfpdouble.c b/arch/arm/vfp/vfpdouble.c new file mode 100644 index 000000000000..fa3053e84db5 --- /dev/null +++ b/arch/arm/vfp/vfpdouble.c | |||
@@ -0,0 +1,1186 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/vfp/vfpdouble.c | ||
3 | * | ||
4 | * This code is derived in part from John R. Housers softfloat library, which | ||
5 | * carries the following notice: | ||
6 | * | ||
7 | * =========================================================================== | ||
8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | ||
9 | * Arithmetic Package, Release 2. | ||
10 | * | ||
11 | * Written by John R. Hauser. This work was made possible in part by the | ||
12 | * International Computer Science Institute, located at Suite 600, 1947 Center | ||
13 | * Street, Berkeley, California 94704. Funding was partially provided by the | ||
14 | * National Science Foundation under grant MIP-9311980. The original version | ||
15 | * of this code was written as part of a project to build a fixed-point vector | ||
16 | * processor in collaboration with the University of California at Berkeley, | ||
17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | ||
18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | ||
19 | * arithmetic/softfloat.html'. | ||
20 | * | ||
21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | ||
22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | ||
23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | ||
24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | ||
25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | ||
26 | * | ||
27 | * Derivative works are acceptable, even for commercial purposes, so long as | ||
28 | * (1) they include prominent notice that the work is derivative, and (2) they | ||
29 | * include prominent notice akin to these three paragraphs for those parts of | ||
30 | * this code that are retained. | ||
31 | * =========================================================================== | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/bitops.h> | ||
35 | #include <asm/ptrace.h> | ||
36 | #include <asm/vfp.h> | ||
37 | |||
38 | #include "vfpinstr.h" | ||
39 | #include "vfp.h" | ||
40 | |||
41 | static struct vfp_double vfp_double_default_qnan = { | ||
42 | .exponent = 2047, | ||
43 | .sign = 0, | ||
44 | .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, | ||
45 | }; | ||
46 | |||
47 | static void vfp_double_dump(const char *str, struct vfp_double *d) | ||
48 | { | ||
49 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", | ||
50 | str, d->sign != 0, d->exponent, d->significand); | ||
51 | } | ||
52 | |||
53 | static void vfp_double_normalise_denormal(struct vfp_double *vd) | ||
54 | { | ||
55 | int bits = 31 - fls(vd->significand >> 32); | ||
56 | if (bits == 31) | ||
57 | bits = 62 - fls(vd->significand); | ||
58 | |||
59 | vfp_double_dump("normalise_denormal: in", vd); | ||
60 | |||
61 | if (bits) { | ||
62 | vd->exponent -= bits - 1; | ||
63 | vd->significand <<= bits; | ||
64 | } | ||
65 | |||
66 | vfp_double_dump("normalise_denormal: out", vd); | ||
67 | } | ||
68 | |||
69 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | ||
70 | { | ||
71 | u64 significand, incr; | ||
72 | int exponent, shift, underflow; | ||
73 | u32 rmode; | ||
74 | |||
75 | vfp_double_dump("pack: in", vd); | ||
76 | |||
77 | /* | ||
78 | * Infinities and NaNs are a special case. | ||
79 | */ | ||
80 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) | ||
81 | goto pack; | ||
82 | |||
83 | /* | ||
84 | * Special-case zero. | ||
85 | */ | ||
86 | if (vd->significand == 0) { | ||
87 | vd->exponent = 0; | ||
88 | goto pack; | ||
89 | } | ||
90 | |||
91 | exponent = vd->exponent; | ||
92 | significand = vd->significand; | ||
93 | |||
94 | shift = 32 - fls(significand >> 32); | ||
95 | if (shift == 32) | ||
96 | shift = 64 - fls(significand); | ||
97 | if (shift) { | ||
98 | exponent -= shift; | ||
99 | significand <<= shift; | ||
100 | } | ||
101 | |||
102 | #ifdef DEBUG | ||
103 | vd->exponent = exponent; | ||
104 | vd->significand = significand; | ||
105 | vfp_double_dump("pack: normalised", vd); | ||
106 | #endif | ||
107 | |||
108 | /* | ||
109 | * Tiny number? | ||
110 | */ | ||
111 | underflow = exponent < 0; | ||
112 | if (underflow) { | ||
113 | significand = vfp_shiftright64jamming(significand, -exponent); | ||
114 | exponent = 0; | ||
115 | #ifdef DEBUG | ||
116 | vd->exponent = exponent; | ||
117 | vd->significand = significand; | ||
118 | vfp_double_dump("pack: tiny number", vd); | ||
119 | #endif | ||
120 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) | ||
121 | underflow = 0; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Select rounding increment. | ||
126 | */ | ||
127 | incr = 0; | ||
128 | rmode = fpscr & FPSCR_RMODE_MASK; | ||
129 | |||
130 | if (rmode == FPSCR_ROUND_NEAREST) { | ||
131 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; | ||
132 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) | ||
133 | incr -= 1; | ||
134 | } else if (rmode == FPSCR_ROUND_TOZERO) { | ||
135 | incr = 0; | ||
136 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) | ||
137 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; | ||
138 | |||
139 | pr_debug("VFP: rounding increment = 0x%08llx\n", incr); | ||
140 | |||
141 | /* | ||
142 | * Is our rounding going to overflow? | ||
143 | */ | ||
144 | if ((significand + incr) < significand) { | ||
145 | exponent += 1; | ||
146 | significand = (significand >> 1) | (significand & 1); | ||
147 | incr >>= 1; | ||
148 | #ifdef DEBUG | ||
149 | vd->exponent = exponent; | ||
150 | vd->significand = significand; | ||
151 | vfp_double_dump("pack: overflow", vd); | ||
152 | #endif | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * If any of the low bits (which will be shifted out of the | ||
157 | * number) are non-zero, the result is inexact. | ||
158 | */ | ||
159 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) | ||
160 | exceptions |= FPSCR_IXC; | ||
161 | |||
162 | /* | ||
163 | * Do our rounding. | ||
164 | */ | ||
165 | significand += incr; | ||
166 | |||
167 | /* | ||
168 | * Infinity? | ||
169 | */ | ||
170 | if (exponent >= 2046) { | ||
171 | exceptions |= FPSCR_OFC | FPSCR_IXC; | ||
172 | if (incr == 0) { | ||
173 | vd->exponent = 2045; | ||
174 | vd->significand = 0x7fffffffffffffffULL; | ||
175 | } else { | ||
176 | vd->exponent = 2047; /* infinity */ | ||
177 | vd->significand = 0; | ||
178 | } | ||
179 | } else { | ||
180 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) | ||
181 | exponent = 0; | ||
182 | if (exponent || significand > 0x8000000000000000ULL) | ||
183 | underflow = 0; | ||
184 | if (underflow) | ||
185 | exceptions |= FPSCR_UFC; | ||
186 | vd->exponent = exponent; | ||
187 | vd->significand = significand >> 1; | ||
188 | } | ||
189 | |||
190 | pack: | ||
191 | vfp_double_dump("pack: final", vd); | ||
192 | { | ||
193 | s64 d = vfp_double_pack(vd); | ||
194 | pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, | ||
195 | dd, d, exceptions); | ||
196 | vfp_put_double(dd, d); | ||
197 | } | ||
198 | return exceptions & ~VFP_NAN_FLAG; | ||
199 | } | ||
200 | |||
201 | /* | ||
202 | * Propagate the NaN, setting exceptions if it is signalling. | ||
203 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. | ||
204 | */ | ||
205 | static u32 | ||
206 | vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, | ||
207 | struct vfp_double *vdm, u32 fpscr) | ||
208 | { | ||
209 | struct vfp_double *nan; | ||
210 | int tn, tm = 0; | ||
211 | |||
212 | tn = vfp_double_type(vdn); | ||
213 | |||
214 | if (vdm) | ||
215 | tm = vfp_double_type(vdm); | ||
216 | |||
217 | if (fpscr & FPSCR_DEFAULT_NAN) | ||
218 | /* | ||
219 | * Default NaN mode - always returns a quiet NaN | ||
220 | */ | ||
221 | nan = &vfp_double_default_qnan; | ||
222 | else { | ||
223 | /* | ||
224 | * Contemporary mode - select the first signalling | ||
225 | * NAN, or if neither are signalling, the first | ||
226 | * quiet NAN. | ||
227 | */ | ||
228 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | ||
229 | nan = vdn; | ||
230 | else | ||
231 | nan = vdm; | ||
232 | /* | ||
233 | * Make the NaN quiet. | ||
234 | */ | ||
235 | nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | ||
236 | } | ||
237 | |||
238 | *vdd = *nan; | ||
239 | |||
240 | /* | ||
241 | * If one was a signalling NAN, raise invalid operation. | ||
242 | */ | ||
243 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * Extended operations | ||
248 | */ | ||
249 | static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) | ||
250 | { | ||
251 | vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm))); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) | ||
256 | { | ||
257 | vfp_put_double(dd, vfp_get_double(dm)); | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) | ||
262 | { | ||
263 | vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm))); | ||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) | ||
268 | { | ||
269 | struct vfp_double vdm, vdd; | ||
270 | int ret, tm; | ||
271 | |||
272 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
273 | tm = vfp_double_type(&vdm); | ||
274 | if (tm & (VFP_NAN|VFP_INFINITY)) { | ||
275 | struct vfp_double *vdp = &vdd; | ||
276 | |||
277 | if (tm & VFP_NAN) | ||
278 | ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); | ||
279 | else if (vdm.sign == 0) { | ||
280 | sqrt_copy: | ||
281 | vdp = &vdm; | ||
282 | ret = 0; | ||
283 | } else { | ||
284 | sqrt_invalid: | ||
285 | vdp = &vfp_double_default_qnan; | ||
286 | ret = FPSCR_IOC; | ||
287 | } | ||
288 | vfp_put_double(dd, vfp_double_pack(vdp)); | ||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | /* | ||
293 | * sqrt(+/- 0) == +/- 0 | ||
294 | */ | ||
295 | if (tm & VFP_ZERO) | ||
296 | goto sqrt_copy; | ||
297 | |||
298 | /* | ||
299 | * Normalise a denormalised number | ||
300 | */ | ||
301 | if (tm & VFP_DENORMAL) | ||
302 | vfp_double_normalise_denormal(&vdm); | ||
303 | |||
304 | /* | ||
305 | * sqrt(<0) = invalid | ||
306 | */ | ||
307 | if (vdm.sign) | ||
308 | goto sqrt_invalid; | ||
309 | |||
310 | vfp_double_dump("sqrt", &vdm); | ||
311 | |||
312 | /* | ||
313 | * Estimate the square root. | ||
314 | */ | ||
315 | vdd.sign = 0; | ||
316 | vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; | ||
317 | vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; | ||
318 | |||
319 | vfp_double_dump("sqrt estimate1", &vdd); | ||
320 | |||
321 | vdm.significand >>= 1 + (vdm.exponent & 1); | ||
322 | vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); | ||
323 | |||
324 | vfp_double_dump("sqrt estimate2", &vdd); | ||
325 | |||
326 | /* | ||
327 | * And now adjust. | ||
328 | */ | ||
329 | if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { | ||
330 | if (vdd.significand < 2) { | ||
331 | vdd.significand = ~0ULL; | ||
332 | } else { | ||
333 | u64 termh, terml, remh, reml; | ||
334 | vdm.significand <<= 2; | ||
335 | mul64to128(&termh, &terml, vdd.significand, vdd.significand); | ||
336 | sub128(&remh, &reml, vdm.significand, 0, termh, terml); | ||
337 | while ((s64)remh < 0) { | ||
338 | vdd.significand -= 1; | ||
339 | shift64left(&termh, &terml, vdd.significand); | ||
340 | terml |= 1; | ||
341 | add128(&remh, &reml, remh, reml, termh, terml); | ||
342 | } | ||
343 | vdd.significand |= (remh | reml) != 0; | ||
344 | } | ||
345 | } | ||
346 | vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); | ||
347 | |||
348 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * Equal := ZC | ||
353 | * Less than := N | ||
354 | * Greater than := C | ||
355 | * Unordered := CV | ||
356 | */ | ||
357 | static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) | ||
358 | { | ||
359 | s64 d, m; | ||
360 | u32 ret = 0; | ||
361 | |||
362 | m = vfp_get_double(dm); | ||
363 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { | ||
364 | ret |= FPSCR_C | FPSCR_V; | ||
365 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | ||
366 | /* | ||
367 | * Signalling NaN, or signalling on quiet NaN | ||
368 | */ | ||
369 | ret |= FPSCR_IOC; | ||
370 | } | ||
371 | |||
372 | d = vfp_get_double(dd); | ||
373 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { | ||
374 | ret |= FPSCR_C | FPSCR_V; | ||
375 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | ||
376 | /* | ||
377 | * Signalling NaN, or signalling on quiet NaN | ||
378 | */ | ||
379 | ret |= FPSCR_IOC; | ||
380 | } | ||
381 | |||
382 | if (ret == 0) { | ||
383 | if (d == m || vfp_double_packed_abs(d | m) == 0) { | ||
384 | /* | ||
385 | * equal | ||
386 | */ | ||
387 | ret |= FPSCR_Z | FPSCR_C; | ||
388 | } else if (vfp_double_packed_sign(d ^ m)) { | ||
389 | /* | ||
390 | * different signs | ||
391 | */ | ||
392 | if (vfp_double_packed_sign(d)) | ||
393 | /* | ||
394 | * d is negative, so d < m | ||
395 | */ | ||
396 | ret |= FPSCR_N; | ||
397 | else | ||
398 | /* | ||
399 | * d is positive, so d > m | ||
400 | */ | ||
401 | ret |= FPSCR_C; | ||
402 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { | ||
403 | /* | ||
404 | * d < m | ||
405 | */ | ||
406 | ret |= FPSCR_N; | ||
407 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { | ||
408 | /* | ||
409 | * d > m | ||
410 | */ | ||
411 | ret |= FPSCR_C; | ||
412 | } | ||
413 | } | ||
414 | |||
415 | return ret; | ||
416 | } | ||
417 | |||
418 | static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) | ||
419 | { | ||
420 | return vfp_compare(dd, 0, dm, fpscr); | ||
421 | } | ||
422 | |||
423 | static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) | ||
424 | { | ||
425 | return vfp_compare(dd, 1, dm, fpscr); | ||
426 | } | ||
427 | |||
428 | static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) | ||
429 | { | ||
430 | return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); | ||
431 | } | ||
432 | |||
433 | static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) | ||
434 | { | ||
435 | return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); | ||
436 | } | ||
437 | |||
438 | static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) | ||
439 | { | ||
440 | struct vfp_double vdm; | ||
441 | struct vfp_single vsd; | ||
442 | int tm; | ||
443 | u32 exceptions = 0; | ||
444 | |||
445 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
446 | |||
447 | tm = vfp_double_type(&vdm); | ||
448 | |||
449 | /* | ||
450 | * If we have a signalling NaN, signal invalid operation. | ||
451 | */ | ||
452 | if (tm == VFP_SNAN) | ||
453 | exceptions = FPSCR_IOC; | ||
454 | |||
455 | if (tm & VFP_DENORMAL) | ||
456 | vfp_double_normalise_denormal(&vdm); | ||
457 | |||
458 | vsd.sign = vdm.sign; | ||
459 | vsd.significand = vfp_hi64to32jamming(vdm.significand); | ||
460 | |||
461 | /* | ||
462 | * If we have an infinity or a NaN, the exponent must be 255 | ||
463 | */ | ||
464 | if (tm & (VFP_INFINITY|VFP_NAN)) { | ||
465 | vsd.exponent = 255; | ||
466 | if (tm & VFP_NAN) | ||
467 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | ||
468 | goto pack_nan; | ||
469 | } else if (tm & VFP_ZERO) | ||
470 | vsd.exponent = 0; | ||
471 | else | ||
472 | vsd.exponent = vdm.exponent - (1023 - 127); | ||
473 | |||
474 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); | ||
475 | |||
476 | pack_nan: | ||
477 | vfp_put_float(sd, vfp_single_pack(&vsd)); | ||
478 | return exceptions; | ||
479 | } | ||
480 | |||
481 | static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) | ||
482 | { | ||
483 | struct vfp_double vdm; | ||
484 | u32 m = vfp_get_float(dm); | ||
485 | |||
486 | vdm.sign = 0; | ||
487 | vdm.exponent = 1023 + 63 - 1; | ||
488 | vdm.significand = (u64)m; | ||
489 | |||
490 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); | ||
491 | } | ||
492 | |||
493 | static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) | ||
494 | { | ||
495 | struct vfp_double vdm; | ||
496 | u32 m = vfp_get_float(dm); | ||
497 | |||
498 | vdm.sign = (m & 0x80000000) >> 16; | ||
499 | vdm.exponent = 1023 + 63 - 1; | ||
500 | vdm.significand = vdm.sign ? -m : m; | ||
501 | |||
502 | return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); | ||
503 | } | ||
504 | |||
505 | static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) | ||
506 | { | ||
507 | struct vfp_double vdm; | ||
508 | u32 d, exceptions = 0; | ||
509 | int rmode = fpscr & FPSCR_RMODE_MASK; | ||
510 | int tm; | ||
511 | |||
512 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
513 | |||
514 | /* | ||
515 | * Do we have a denormalised number? | ||
516 | */ | ||
517 | tm = vfp_double_type(&vdm); | ||
518 | if (tm & VFP_DENORMAL) | ||
519 | exceptions |= FPSCR_IDC; | ||
520 | |||
521 | if (tm & VFP_NAN) | ||
522 | vdm.sign = 0; | ||
523 | |||
524 | if (vdm.exponent >= 1023 + 32) { | ||
525 | d = vdm.sign ? 0 : 0xffffffff; | ||
526 | exceptions = FPSCR_IOC; | ||
527 | } else if (vdm.exponent >= 1023 - 1) { | ||
528 | int shift = 1023 + 63 - vdm.exponent; | ||
529 | u64 rem, incr = 0; | ||
530 | |||
531 | /* | ||
532 | * 2^0 <= m < 2^32-2^8 | ||
533 | */ | ||
534 | d = (vdm.significand << 1) >> shift; | ||
535 | rem = vdm.significand << (65 - shift); | ||
536 | |||
537 | if (rmode == FPSCR_ROUND_NEAREST) { | ||
538 | incr = 0x8000000000000000ULL; | ||
539 | if ((d & 1) == 0) | ||
540 | incr -= 1; | ||
541 | } else if (rmode == FPSCR_ROUND_TOZERO) { | ||
542 | incr = 0; | ||
543 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | ||
544 | incr = ~0ULL; | ||
545 | } | ||
546 | |||
547 | if ((rem + incr) < rem) { | ||
548 | if (d < 0xffffffff) | ||
549 | d += 1; | ||
550 | else | ||
551 | exceptions |= FPSCR_IOC; | ||
552 | } | ||
553 | |||
554 | if (d && vdm.sign) { | ||
555 | d = 0; | ||
556 | exceptions |= FPSCR_IOC; | ||
557 | } else if (rem) | ||
558 | exceptions |= FPSCR_IXC; | ||
559 | } else { | ||
560 | d = 0; | ||
561 | if (vdm.exponent | vdm.significand) { | ||
562 | exceptions |= FPSCR_IXC; | ||
563 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | ||
564 | d = 1; | ||
565 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { | ||
566 | d = 0; | ||
567 | exceptions |= FPSCR_IOC; | ||
568 | } | ||
569 | } | ||
570 | } | ||
571 | |||
572 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | ||
573 | |||
574 | vfp_put_float(sd, d); | ||
575 | |||
576 | return exceptions; | ||
577 | } | ||
578 | |||
579 | static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) | ||
580 | { | ||
581 | return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); | ||
582 | } | ||
583 | |||
584 | static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) | ||
585 | { | ||
586 | struct vfp_double vdm; | ||
587 | u32 d, exceptions = 0; | ||
588 | int rmode = fpscr & FPSCR_RMODE_MASK; | ||
589 | |||
590 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
591 | vfp_double_dump("VDM", &vdm); | ||
592 | |||
593 | /* | ||
594 | * Do we have denormalised number? | ||
595 | */ | ||
596 | if (vfp_double_type(&vdm) & VFP_DENORMAL) | ||
597 | exceptions |= FPSCR_IDC; | ||
598 | |||
599 | if (vdm.exponent >= 1023 + 32) { | ||
600 | d = 0x7fffffff; | ||
601 | if (vdm.sign) | ||
602 | d = ~d; | ||
603 | exceptions |= FPSCR_IOC; | ||
604 | } else if (vdm.exponent >= 1023 - 1) { | ||
605 | int shift = 1023 + 63 - vdm.exponent; /* 58 */ | ||
606 | u64 rem, incr = 0; | ||
607 | |||
608 | d = (vdm.significand << 1) >> shift; | ||
609 | rem = vdm.significand << (65 - shift); | ||
610 | |||
611 | if (rmode == FPSCR_ROUND_NEAREST) { | ||
612 | incr = 0x8000000000000000ULL; | ||
613 | if ((d & 1) == 0) | ||
614 | incr -= 1; | ||
615 | } else if (rmode == FPSCR_ROUND_TOZERO) { | ||
616 | incr = 0; | ||
617 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { | ||
618 | incr = ~0ULL; | ||
619 | } | ||
620 | |||
621 | if ((rem + incr) < rem && d < 0xffffffff) | ||
622 | d += 1; | ||
623 | if (d > 0x7fffffff + (vdm.sign != 0)) { | ||
624 | d = 0x7fffffff + (vdm.sign != 0); | ||
625 | exceptions |= FPSCR_IOC; | ||
626 | } else if (rem) | ||
627 | exceptions |= FPSCR_IXC; | ||
628 | |||
629 | if (vdm.sign) | ||
630 | d = -d; | ||
631 | } else { | ||
632 | d = 0; | ||
633 | if (vdm.exponent | vdm.significand) { | ||
634 | exceptions |= FPSCR_IXC; | ||
635 | if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) | ||
636 | d = 1; | ||
637 | else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) | ||
638 | d = -1; | ||
639 | } | ||
640 | } | ||
641 | |||
642 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | ||
643 | |||
644 | vfp_put_float(sd, (s32)d); | ||
645 | |||
646 | return exceptions; | ||
647 | } | ||
648 | |||
649 | static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) | ||
650 | { | ||
651 | return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); | ||
652 | } | ||
653 | |||
654 | |||
655 | static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = { | ||
656 | [FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy, | ||
657 | [FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs, | ||
658 | [FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg, | ||
659 | [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt, | ||
660 | [FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp, | ||
661 | [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe, | ||
662 | [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz, | ||
663 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez, | ||
664 | [FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts, | ||
665 | [FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito, | ||
666 | [FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito, | ||
667 | [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui, | ||
668 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz, | ||
669 | [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi, | ||
670 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz, | ||
671 | }; | ||
672 | |||
673 | |||
674 | |||
675 | |||
676 | static u32 | ||
677 | vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, | ||
678 | struct vfp_double *vdm, u32 fpscr) | ||
679 | { | ||
680 | struct vfp_double *vdp; | ||
681 | u32 exceptions = 0; | ||
682 | int tn, tm; | ||
683 | |||
684 | tn = vfp_double_type(vdn); | ||
685 | tm = vfp_double_type(vdm); | ||
686 | |||
687 | if (tn & tm & VFP_INFINITY) { | ||
688 | /* | ||
689 | * Two infinities. Are they different signs? | ||
690 | */ | ||
691 | if (vdn->sign ^ vdm->sign) { | ||
692 | /* | ||
693 | * different signs -> invalid | ||
694 | */ | ||
695 | exceptions = FPSCR_IOC; | ||
696 | vdp = &vfp_double_default_qnan; | ||
697 | } else { | ||
698 | /* | ||
699 | * same signs -> valid | ||
700 | */ | ||
701 | vdp = vdn; | ||
702 | } | ||
703 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | ||
704 | /* | ||
705 | * One infinity and one number -> infinity | ||
706 | */ | ||
707 | vdp = vdn; | ||
708 | } else { | ||
709 | /* | ||
710 | * 'n' is a NaN of some type | ||
711 | */ | ||
712 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | ||
713 | } | ||
714 | *vdd = *vdp; | ||
715 | return exceptions; | ||
716 | } | ||
717 | |||
718 | static u32 | ||
719 | vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, | ||
720 | struct vfp_double *vdm, u32 fpscr) | ||
721 | { | ||
722 | u32 exp_diff; | ||
723 | u64 m_sig; | ||
724 | |||
725 | if (vdn->significand & (1ULL << 63) || | ||
726 | vdm->significand & (1ULL << 63)) { | ||
727 | pr_info("VFP: bad FP values in %s\n", __func__); | ||
728 | vfp_double_dump("VDN", vdn); | ||
729 | vfp_double_dump("VDM", vdm); | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * Ensure that 'n' is the largest magnitude number. Note that | ||
734 | * if 'n' and 'm' have equal exponents, we do not swap them. | ||
735 | * This ensures that NaN propagation works correctly. | ||
736 | */ | ||
737 | if (vdn->exponent < vdm->exponent) { | ||
738 | struct vfp_double *t = vdn; | ||
739 | vdn = vdm; | ||
740 | vdm = t; | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, | ||
745 | * infinity or a NaN here. | ||
746 | */ | ||
747 | if (vdn->exponent == 2047) | ||
748 | return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); | ||
749 | |||
750 | /* | ||
751 | * We have two proper numbers, where 'vdn' is the larger magnitude. | ||
752 | * | ||
753 | * Copy 'n' to 'd' before doing the arithmetic. | ||
754 | */ | ||
755 | *vdd = *vdn; | ||
756 | |||
757 | /* | ||
758 | * Align 'm' with the result. | ||
759 | */ | ||
760 | exp_diff = vdn->exponent - vdm->exponent; | ||
761 | m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); | ||
762 | |||
763 | /* | ||
764 | * If the signs are different, we are really subtracting. | ||
765 | */ | ||
766 | if (vdn->sign ^ vdm->sign) { | ||
767 | m_sig = vdn->significand - m_sig; | ||
768 | if ((s64)m_sig < 0) { | ||
769 | vdd->sign = vfp_sign_negate(vdd->sign); | ||
770 | m_sig = -m_sig; | ||
771 | } | ||
772 | } else { | ||
773 | m_sig += vdn->significand; | ||
774 | } | ||
775 | vdd->significand = m_sig; | ||
776 | |||
777 | return 0; | ||
778 | } | ||
779 | |||
780 | static u32 | ||
781 | vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, | ||
782 | struct vfp_double *vdm, u32 fpscr) | ||
783 | { | ||
784 | vfp_double_dump("VDN", vdn); | ||
785 | vfp_double_dump("VDM", vdm); | ||
786 | |||
787 | /* | ||
788 | * Ensure that 'n' is the largest magnitude number. Note that | ||
789 | * if 'n' and 'm' have equal exponents, we do not swap them. | ||
790 | * This ensures that NaN propagation works correctly. | ||
791 | */ | ||
792 | if (vdn->exponent < vdm->exponent) { | ||
793 | struct vfp_double *t = vdn; | ||
794 | vdn = vdm; | ||
795 | vdm = t; | ||
796 | pr_debug("VFP: swapping M <-> N\n"); | ||
797 | } | ||
798 | |||
799 | vdd->sign = vdn->sign ^ vdm->sign; | ||
800 | |||
801 | /* | ||
802 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. | ||
803 | */ | ||
804 | if (vdn->exponent == 2047) { | ||
805 | if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) | ||
806 | return vfp_propagate_nan(vdd, vdn, vdm, fpscr); | ||
807 | if ((vdm->exponent | vdm->significand) == 0) { | ||
808 | *vdd = vfp_double_default_qnan; | ||
809 | return FPSCR_IOC; | ||
810 | } | ||
811 | vdd->exponent = vdn->exponent; | ||
812 | vdd->significand = 0; | ||
813 | return 0; | ||
814 | } | ||
815 | |||
816 | /* | ||
817 | * If 'm' is zero, the result is always zero. In this case, | ||
818 | * 'n' may be zero or a number, but it doesn't matter which. | ||
819 | */ | ||
820 | if ((vdm->exponent | vdm->significand) == 0) { | ||
821 | vdd->exponent = 0; | ||
822 | vdd->significand = 0; | ||
823 | return 0; | ||
824 | } | ||
825 | |||
826 | /* | ||
827 | * We add 2 to the destination exponent for the same reason | ||
828 | * as the addition case - though this time we have +1 from | ||
829 | * each input operand. | ||
830 | */ | ||
831 | vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; | ||
832 | vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); | ||
833 | |||
834 | vfp_double_dump("VDD", vdd); | ||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | #define NEG_MULTIPLY (1 << 0) | ||
839 | #define NEG_SUBTRACT (1 << 1) | ||
840 | |||
841 | static u32 | ||
842 | vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) | ||
843 | { | ||
844 | struct vfp_double vdd, vdp, vdn, vdm; | ||
845 | u32 exceptions; | ||
846 | |||
847 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | ||
848 | if (vdn.exponent == 0 && vdn.significand) | ||
849 | vfp_double_normalise_denormal(&vdn); | ||
850 | |||
851 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
852 | if (vdm.exponent == 0 && vdm.significand) | ||
853 | vfp_double_normalise_denormal(&vdm); | ||
854 | |||
855 | exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); | ||
856 | if (negate & NEG_MULTIPLY) | ||
857 | vdp.sign = vfp_sign_negate(vdp.sign); | ||
858 | |||
859 | vfp_double_unpack(&vdn, vfp_get_double(dd)); | ||
860 | if (negate & NEG_SUBTRACT) | ||
861 | vdn.sign = vfp_sign_negate(vdn.sign); | ||
862 | |||
863 | exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); | ||
864 | |||
865 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); | ||
866 | } | ||
867 | |||
868 | /* | ||
869 | * Standard operations | ||
870 | */ | ||
871 | |||
872 | /* | ||
873 | * sd = sd + (sn * sm) | ||
874 | */ | ||
875 | static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) | ||
876 | { | ||
877 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); | ||
878 | } | ||
879 | |||
880 | /* | ||
881 | * sd = sd - (sn * sm) | ||
882 | */ | ||
883 | static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) | ||
884 | { | ||
885 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); | ||
886 | } | ||
887 | |||
888 | /* | ||
889 | * sd = -sd + (sn * sm) | ||
890 | */ | ||
891 | static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) | ||
892 | { | ||
893 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); | ||
894 | } | ||
895 | |||
896 | /* | ||
897 | * sd = -sd - (sn * sm) | ||
898 | */ | ||
899 | static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) | ||
900 | { | ||
901 | return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | ||
902 | } | ||
903 | |||
904 | /* | ||
905 | * sd = sn * sm | ||
906 | */ | ||
907 | static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) | ||
908 | { | ||
909 | struct vfp_double vdd, vdn, vdm; | ||
910 | u32 exceptions; | ||
911 | |||
912 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | ||
913 | if (vdn.exponent == 0 && vdn.significand) | ||
914 | vfp_double_normalise_denormal(&vdn); | ||
915 | |||
916 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
917 | if (vdm.exponent == 0 && vdm.significand) | ||
918 | vfp_double_normalise_denormal(&vdm); | ||
919 | |||
920 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | ||
921 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); | ||
922 | } | ||
923 | |||
924 | /* | ||
925 | * sd = -(sn * sm) | ||
926 | */ | ||
927 | static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) | ||
928 | { | ||
929 | struct vfp_double vdd, vdn, vdm; | ||
930 | u32 exceptions; | ||
931 | |||
932 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | ||
933 | if (vdn.exponent == 0 && vdn.significand) | ||
934 | vfp_double_normalise_denormal(&vdn); | ||
935 | |||
936 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
937 | if (vdm.exponent == 0 && vdm.significand) | ||
938 | vfp_double_normalise_denormal(&vdm); | ||
939 | |||
940 | exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); | ||
941 | vdd.sign = vfp_sign_negate(vdd.sign); | ||
942 | |||
943 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); | ||
944 | } | ||
945 | |||
946 | /* | ||
947 | * sd = sn + sm | ||
948 | */ | ||
949 | static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) | ||
950 | { | ||
951 | struct vfp_double vdd, vdn, vdm; | ||
952 | u32 exceptions; | ||
953 | |||
954 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | ||
955 | if (vdn.exponent == 0 && vdn.significand) | ||
956 | vfp_double_normalise_denormal(&vdn); | ||
957 | |||
958 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
959 | if (vdm.exponent == 0 && vdm.significand) | ||
960 | vfp_double_normalise_denormal(&vdm); | ||
961 | |||
962 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | ||
963 | |||
964 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); | ||
965 | } | ||
966 | |||
967 | /* | ||
968 | * sd = sn - sm | ||
969 | */ | ||
970 | static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) | ||
971 | { | ||
972 | struct vfp_double vdd, vdn, vdm; | ||
973 | u32 exceptions; | ||
974 | |||
975 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | ||
976 | if (vdn.exponent == 0 && vdn.significand) | ||
977 | vfp_double_normalise_denormal(&vdn); | ||
978 | |||
979 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
980 | if (vdm.exponent == 0 && vdm.significand) | ||
981 | vfp_double_normalise_denormal(&vdm); | ||
982 | |||
983 | /* | ||
984 | * Subtraction is like addition, but with a negated operand. | ||
985 | */ | ||
986 | vdm.sign = vfp_sign_negate(vdm.sign); | ||
987 | |||
988 | exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); | ||
989 | |||
990 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | * sd = sn / sm | ||
995 | */ | ||
996 | static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) | ||
997 | { | ||
998 | struct vfp_double vdd, vdn, vdm; | ||
999 | u32 exceptions = 0; | ||
1000 | int tm, tn; | ||
1001 | |||
1002 | vfp_double_unpack(&vdn, vfp_get_double(dn)); | ||
1003 | vfp_double_unpack(&vdm, vfp_get_double(dm)); | ||
1004 | |||
1005 | vdd.sign = vdn.sign ^ vdm.sign; | ||
1006 | |||
1007 | tn = vfp_double_type(&vdn); | ||
1008 | tm = vfp_double_type(&vdm); | ||
1009 | |||
1010 | /* | ||
1011 | * Is n a NAN? | ||
1012 | */ | ||
1013 | if (tn & VFP_NAN) | ||
1014 | goto vdn_nan; | ||
1015 | |||
1016 | /* | ||
1017 | * Is m a NAN? | ||
1018 | */ | ||
1019 | if (tm & VFP_NAN) | ||
1020 | goto vdm_nan; | ||
1021 | |||
1022 | /* | ||
1023 | * If n and m are infinity, the result is invalid | ||
1024 | * If n and m are zero, the result is invalid | ||
1025 | */ | ||
1026 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | ||
1027 | goto invalid; | ||
1028 | |||
1029 | /* | ||
1030 | * If n is infinity, the result is infinity | ||
1031 | */ | ||
1032 | if (tn & VFP_INFINITY) | ||
1033 | goto infinity; | ||
1034 | |||
1035 | /* | ||
1036 | * If m is zero, raise div0 exceptions | ||
1037 | */ | ||
1038 | if (tm & VFP_ZERO) | ||
1039 | goto divzero; | ||
1040 | |||
1041 | /* | ||
1042 | * If m is infinity, or n is zero, the result is zero | ||
1043 | */ | ||
1044 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | ||
1045 | goto zero; | ||
1046 | |||
1047 | if (tn & VFP_DENORMAL) | ||
1048 | vfp_double_normalise_denormal(&vdn); | ||
1049 | if (tm & VFP_DENORMAL) | ||
1050 | vfp_double_normalise_denormal(&vdm); | ||
1051 | |||
1052 | /* | ||
1053 | * Ok, we have two numbers, we can perform division. | ||
1054 | */ | ||
1055 | vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; | ||
1056 | vdm.significand <<= 1; | ||
1057 | if (vdm.significand <= (2 * vdn.significand)) { | ||
1058 | vdn.significand >>= 1; | ||
1059 | vdd.exponent++; | ||
1060 | } | ||
1061 | vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); | ||
1062 | if ((vdd.significand & 0x1ff) <= 2) { | ||
1063 | u64 termh, terml, remh, reml; | ||
1064 | mul64to128(&termh, &terml, vdm.significand, vdd.significand); | ||
1065 | sub128(&remh, &reml, vdn.significand, 0, termh, terml); | ||
1066 | while ((s64)remh < 0) { | ||
1067 | vdd.significand -= 1; | ||
1068 | add128(&remh, &reml, remh, reml, 0, vdm.significand); | ||
1069 | } | ||
1070 | vdd.significand |= (reml != 0); | ||
1071 | } | ||
1072 | return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); | ||
1073 | |||
1074 | vdn_nan: | ||
1075 | exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); | ||
1076 | pack: | ||
1077 | vfp_put_double(dd, vfp_double_pack(&vdd)); | ||
1078 | return exceptions; | ||
1079 | |||
1080 | vdm_nan: | ||
1081 | exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); | ||
1082 | goto pack; | ||
1083 | |||
1084 | zero: | ||
1085 | vdd.exponent = 0; | ||
1086 | vdd.significand = 0; | ||
1087 | goto pack; | ||
1088 | |||
1089 | divzero: | ||
1090 | exceptions = FPSCR_DZC; | ||
1091 | infinity: | ||
1092 | vdd.exponent = 2047; | ||
1093 | vdd.significand = 0; | ||
1094 | goto pack; | ||
1095 | |||
1096 | invalid: | ||
1097 | vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan)); | ||
1098 | return FPSCR_IOC; | ||
1099 | } | ||
1100 | |||
1101 | static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = { | ||
1102 | [FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac, | ||
1103 | [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac, | ||
1104 | [FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc, | ||
1105 | [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc, | ||
1106 | [FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul, | ||
1107 | [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul, | ||
1108 | [FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd, | ||
1109 | [FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub, | ||
1110 | [FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv, | ||
1111 | }; | ||
1112 | |||
1113 | #define FREG_BANK(x) ((x) & 0x0c) | ||
1114 | #define FREG_IDX(x) ((x) & 3) | ||
1115 | |||
1116 | u32 vfp_double_cpdo(u32 inst, u32 fpscr) | ||
1117 | { | ||
1118 | u32 op = inst & FOP_MASK; | ||
1119 | u32 exceptions = 0; | ||
1120 | unsigned int dd = vfp_get_sd(inst); | ||
1121 | unsigned int dn = vfp_get_sn(inst); | ||
1122 | unsigned int dm = vfp_get_sm(inst); | ||
1123 | unsigned int vecitr, veclen, vecstride; | ||
1124 | u32 (*fop)(int, int, s32, u32); | ||
1125 | |||
1126 | veclen = fpscr & FPSCR_LENGTH_MASK; | ||
1127 | vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; | ||
1128 | |||
1129 | /* | ||
1130 | * If destination bank is zero, vector length is always '1'. | ||
1131 | * ARM DDI0100F C5.1.3, C5.3.2. | ||
1132 | */ | ||
1133 | if (FREG_BANK(dd) == 0) | ||
1134 | veclen = 0; | ||
1135 | |||
1136 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | ||
1137 | (veclen >> FPSCR_LENGTH_BIT) + 1); | ||
1138 | |||
1139 | fop = (op == FOP_EXT) ? fop_extfns[dn] : fop_fns[FOP_TO_IDX(op)]; | ||
1140 | if (!fop) | ||
1141 | goto invalid; | ||
1142 | |||
1143 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | ||
1144 | u32 except; | ||
1145 | |||
1146 | if (op == FOP_EXT) | ||
1147 | pr_debug("VFP: itr%d (d%u.%u) = op[%u] (d%u.%u)\n", | ||
1148 | vecitr >> FPSCR_LENGTH_BIT, | ||
1149 | dd >> 1, dd & 1, dn, | ||
1150 | dm >> 1, dm & 1); | ||
1151 | else | ||
1152 | pr_debug("VFP: itr%d (d%u.%u) = (d%u.%u) op[%u] (d%u.%u)\n", | ||
1153 | vecitr >> FPSCR_LENGTH_BIT, | ||
1154 | dd >> 1, dd & 1, | ||
1155 | dn >> 1, dn & 1, | ||
1156 | FOP_TO_IDX(op), | ||
1157 | dm >> 1, dm & 1); | ||
1158 | |||
1159 | except = fop(dd, dn, dm, fpscr); | ||
1160 | pr_debug("VFP: itr%d: exceptions=%08x\n", | ||
1161 | vecitr >> FPSCR_LENGTH_BIT, except); | ||
1162 | |||
1163 | exceptions |= except; | ||
1164 | |||
1165 | /* | ||
1166 | * This ensures that comparisons only operate on scalars; | ||
1167 | * comparisons always return with one FPSCR status bit set. | ||
1168 | */ | ||
1169 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | ||
1170 | break; | ||
1171 | |||
1172 | /* | ||
1173 | * CHECK: It appears to be undefined whether we stop when | ||
1174 | * we encounter an exception. We continue. | ||
1175 | */ | ||
1176 | |||
1177 | dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6); | ||
1178 | dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); | ||
1179 | if (FREG_BANK(dm) != 0) | ||
1180 | dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); | ||
1181 | } | ||
1182 | return exceptions; | ||
1183 | |||
1184 | invalid: | ||
1185 | return ~0; | ||
1186 | } | ||
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S new file mode 100644 index 000000000000..de4ca1223c58 --- /dev/null +++ b/arch/arm/vfp/vfphw.S | |||
@@ -0,0 +1,215 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/vfp/vfphw.S | ||
3 | * | ||
4 | * Copyright (C) 2004 ARM Limited. | ||
5 | * Written by Deep Blue Solutions Limited. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This code is called from the kernel's undefined instruction trap. | ||
12 | * r9 holds the return address for successful handling. | ||
13 | * lr holds the return address for unrecognised instructions. | ||
14 | * r10 points at the start of the private FP workspace in the thread structure | ||
15 | * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h) | ||
16 | */ | ||
17 | #include <asm/thread_info.h> | ||
18 | #include <asm/vfpmacros.h> | ||
19 | #include "../kernel/entry-header.S" | ||
20 | |||
21 | .macro DBGSTR, str | ||
22 | #ifdef DEBUG | ||
23 | stmfd sp!, {r0-r3, ip, lr} | ||
24 | add r0, pc, #4 | ||
25 | bl printk | ||
26 | b 1f | ||
27 | .asciz "<7>VFP: \str\n" | ||
28 | .balign 4 | ||
29 | 1: ldmfd sp!, {r0-r3, ip, lr} | ||
30 | #endif | ||
31 | .endm | ||
32 | |||
33 | .macro DBGSTR1, str, arg | ||
34 | #ifdef DEBUG | ||
35 | stmfd sp!, {r0-r3, ip, lr} | ||
36 | mov r1, \arg | ||
37 | add r0, pc, #4 | ||
38 | bl printk | ||
39 | b 1f | ||
40 | .asciz "<7>VFP: \str\n" | ||
41 | .balign 4 | ||
42 | 1: ldmfd sp!, {r0-r3, ip, lr} | ||
43 | #endif | ||
44 | .endm | ||
45 | |||
46 | .macro DBGSTR3, str, arg1, arg2, arg3 | ||
47 | #ifdef DEBUG | ||
48 | stmfd sp!, {r0-r3, ip, lr} | ||
49 | mov r3, \arg3 | ||
50 | mov r2, \arg2 | ||
51 | mov r1, \arg1 | ||
52 | add r0, pc, #4 | ||
53 | bl printk | ||
54 | b 1f | ||
55 | .asciz "<7>VFP: \str\n" | ||
56 | .balign 4 | ||
57 | 1: ldmfd sp!, {r0-r3, ip, lr} | ||
58 | #endif | ||
59 | .endm | ||
60 | |||
61 | |||
62 | @ VFP hardware support entry point. | ||
63 | @ | ||
64 | @ r0 = faulted instruction | ||
65 | @ r2 = faulted PC+4 | ||
66 | @ r9 = successful return | ||
67 | @ r10 = vfp_state union | ||
68 | @ lr = failure return | ||
69 | |||
70 | .globl vfp_support_entry | ||
71 | vfp_support_entry: | ||
72 | DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 | ||
73 | |||
74 | VFPFMRX r1, FPEXC @ Is the VFP enabled? | ||
75 | DBGSTR1 "fpexc %08x", r1 | ||
76 | tst r1, #FPEXC_ENABLE | ||
77 | bne look_for_VFP_exceptions @ VFP is already enabled | ||
78 | |||
79 | DBGSTR1 "enable %x", r10 | ||
80 | ldr r3, last_VFP_context_address | ||
81 | orr r1, r1, #FPEXC_ENABLE @ user FPEXC has the enable bit set | ||
82 | ldr r4, [r3] @ last_VFP_context pointer | ||
83 | bic r5, r1, #FPEXC_EXCEPTION @ make sure exceptions are disabled | ||
84 | cmp r4, r10 | ||
85 | beq check_for_exception @ we are returning to the same | ||
86 | @ process, so the registers are | ||
87 | @ still there. In this case, we do | ||
88 | @ not want to drop a pending exception. | ||
89 | |||
90 | VFPFMXR FPEXC, r5 @ enable VFP, disable any pending | ||
91 | @ exceptions, so we can get at the | ||
92 | @ rest of it | ||
93 | |||
94 | @ Save out the current registers to the old thread state | ||
95 | |||
96 | DBGSTR1 "save old state %p", r4 | ||
97 | cmp r4, #0 | ||
98 | beq no_old_VFP_process | ||
99 | VFPFMRX r5, FPSCR @ current status | ||
100 | VFPFMRX r6, FPINST @ FPINST (always there, rev0 onwards) | ||
101 | tst r1, #FPEXC_FPV2 @ is there an FPINST2 to read? | ||
102 | VFPFMRX r8, FPINST2, NE @ FPINST2 if needed - avoids reading | ||
103 | @ nonexistant reg on rev0 | ||
104 | VFPFSTMIA r4 @ save the working registers | ||
105 | add r4, r4, #8*16+4 | ||
106 | stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 | ||
107 | @ and point r4 at the word at the | ||
108 | @ start of the register dump | ||
109 | |||
110 | no_old_VFP_process: | ||
111 | DBGSTR1 "load state %p", r10 | ||
112 | str r10, [r3] @ update the last_VFP_context pointer | ||
113 | @ Load the saved state back into the VFP | ||
114 | add r4, r10, #8*16+4 | ||
115 | ldmia r4, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 | ||
116 | VFPFLDMIA r10 @ reload the working registers while | ||
117 | @ FPEXC is in a safe state | ||
118 | tst r1, #FPEXC_FPV2 @ is there an FPINST2 to write? | ||
119 | VFPFMXR FPINST2, r8, NE @ FPINST2 if needed - avoids writing | ||
120 | @ nonexistant reg on rev0 | ||
121 | VFPFMXR FPINST, r6 | ||
122 | VFPFMXR FPSCR, r5 @ restore status | ||
123 | |||
124 | check_for_exception: | ||
125 | tst r1, #FPEXC_EXCEPTION | ||
126 | bne process_exception @ might as well handle the pending | ||
127 | @ exception before retrying branch | ||
128 | @ out before setting an FPEXC that | ||
129 | @ stops us reading stuff | ||
130 | VFPFMXR FPEXC, r1 @ restore FPEXC last | ||
131 | sub r2, r2, #4 | ||
132 | str r2, [sp, #S_PC] @ retry the instruction | ||
133 | mov pc, r9 @ we think we have handled things | ||
134 | |||
135 | |||
136 | look_for_VFP_exceptions: | ||
137 | tst r1, #FPEXC_EXCEPTION | ||
138 | bne process_exception | ||
139 | VFPFMRX r5, FPSCR | ||
140 | tst r5, #FPSCR_IXE @ IXE doesn't set FPEXC_EXCEPTION ! | ||
141 | bne process_exception | ||
142 | |||
143 | @ Fall into hand on to next handler - appropriate coproc instr | ||
144 | @ not recognised by VFP | ||
145 | |||
146 | DBGSTR "not VFP" | ||
147 | mov pc, lr | ||
148 | |||
149 | process_exception: | ||
150 | DBGSTR "bounce" | ||
151 | sub r2, r2, #4 | ||
152 | str r2, [sp, #S_PC] @ retry the instruction on exit from | ||
153 | @ the imprecise exception handling in | ||
154 | @ the support code | ||
155 | mov r2, sp @ nothing stacked - regdump is at TOS | ||
156 | mov lr, r9 @ setup for a return to the user code. | ||
157 | |||
158 | @ Now call the C code to package up the bounce to the support code | ||
159 | @ r0 holds the trigger instruction | ||
160 | @ r1 holds the FPEXC value | ||
161 | @ r2 pointer to register dump | ||
162 | b VFP9_bounce @ we have handled this - the support | ||
163 | @ code will raise an exception if | ||
164 | @ required. If not, the user code will | ||
165 | @ retry the faulted instruction | ||
166 | |||
167 | last_VFP_context_address: | ||
168 | .word last_VFP_context | ||
169 | |||
170 | .globl vfp_get_float | ||
171 | vfp_get_float: | ||
172 | add pc, pc, r0, lsl #3 | ||
173 | mov r0, r0 | ||
174 | .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 | ||
175 | mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0 | ||
176 | mov pc, lr | ||
177 | mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1 | ||
178 | mov pc, lr | ||
179 | .endr | ||
180 | |||
181 | .globl vfp_put_float | ||
182 | vfp_put_float: | ||
183 | add pc, pc, r0, lsl #3 | ||
184 | mov r0, r0 | ||
185 | .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 | ||
186 | mcr p10, 0, r1, c\dr, c0, 0 @ fmsr r0, s0 | ||
187 | mov pc, lr | ||
188 | mcr p10, 0, r1, c\dr, c0, 4 @ fmsr r0, s1 | ||
189 | mov pc, lr | ||
190 | .endr | ||
191 | |||
192 | .globl vfp_get_double | ||
193 | vfp_get_double: | ||
194 | mov r0, r0, lsr #1 | ||
195 | add pc, pc, r0, lsl #3 | ||
196 | mov r0, r0 | ||
197 | .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 | ||
198 | mrrc p10, 1, r0, r1, c\dr @ fmrrd r0, r1, d\dr | ||
199 | mov pc, lr | ||
200 | .endr | ||
201 | |||
202 | @ virtual register 16 for compare with zero | ||
203 | mov r0, #0 | ||
204 | mov r1, #0 | ||
205 | mov pc, lr | ||
206 | |||
207 | .globl vfp_put_double | ||
208 | vfp_put_double: | ||
209 | mov r0, r0, lsr #1 | ||
210 | add pc, pc, r0, lsl #3 | ||
211 | mov r0, r0 | ||
212 | .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 | ||
213 | mcrr p10, 1, r1, r2, c\dr @ fmrrd r1, r2, d\dr | ||
214 | mov pc, lr | ||
215 | .endr | ||
diff --git a/arch/arm/vfp/vfpinstr.h b/arch/arm/vfp/vfpinstr.h new file mode 100644 index 000000000000..6c819aeae006 --- /dev/null +++ b/arch/arm/vfp/vfpinstr.h | |||
@@ -0,0 +1,88 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/vfp/vfpinstr.h | ||
3 | * | ||
4 | * Copyright (C) 2004 ARM Limited. | ||
5 | * Written by Deep Blue Solutions Limited. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * VFP instruction masks. | ||
12 | */ | ||
13 | #define INST_CPRTDO(inst) (((inst) & 0x0f000000) == 0x0e000000) | ||
14 | #define INST_CPRT(inst) ((inst) & (1 << 4)) | ||
15 | #define INST_CPRT_L(inst) ((inst) & (1 << 20)) | ||
16 | #define INST_CPRT_Rd(inst) (((inst) & (15 << 12)) >> 12) | ||
17 | #define INST_CPRT_OP(inst) (((inst) >> 21) & 7) | ||
18 | #define INST_CPNUM(inst) ((inst) & 0xf00) | ||
19 | #define CPNUM(cp) ((cp) << 8) | ||
20 | |||
21 | #define FOP_MASK (0x00b00040) | ||
22 | #define FOP_FMAC (0x00000000) | ||
23 | #define FOP_FNMAC (0x00000040) | ||
24 | #define FOP_FMSC (0x00100000) | ||
25 | #define FOP_FNMSC (0x00100040) | ||
26 | #define FOP_FMUL (0x00200000) | ||
27 | #define FOP_FNMUL (0x00200040) | ||
28 | #define FOP_FADD (0x00300000) | ||
29 | #define FOP_FSUB (0x00300040) | ||
30 | #define FOP_FDIV (0x00800000) | ||
31 | #define FOP_EXT (0x00b00040) | ||
32 | |||
33 | #define FOP_TO_IDX(inst) ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4) | ||
34 | |||
35 | #define FEXT_MASK (0x000f0080) | ||
36 | #define FEXT_FCPY (0x00000000) | ||
37 | #define FEXT_FABS (0x00000080) | ||
38 | #define FEXT_FNEG (0x00010000) | ||
39 | #define FEXT_FSQRT (0x00010080) | ||
40 | #define FEXT_FCMP (0x00040000) | ||
41 | #define FEXT_FCMPE (0x00040080) | ||
42 | #define FEXT_FCMPZ (0x00050000) | ||
43 | #define FEXT_FCMPEZ (0x00050080) | ||
44 | #define FEXT_FCVT (0x00070080) | ||
45 | #define FEXT_FUITO (0x00080000) | ||
46 | #define FEXT_FSITO (0x00080080) | ||
47 | #define FEXT_FTOUI (0x000c0000) | ||
48 | #define FEXT_FTOUIZ (0x000c0080) | ||
49 | #define FEXT_FTOSI (0x000d0000) | ||
50 | #define FEXT_FTOSIZ (0x000d0080) | ||
51 | |||
52 | #define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) | ||
53 | |||
54 | #define vfp_get_sd(inst) ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22) | ||
55 | #define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12) | ||
56 | #define vfp_get_sm(inst) ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5) | ||
57 | #define vfp_get_dm(inst) ((inst & 0x0000000f)) | ||
58 | #define vfp_get_sn(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) | ||
59 | #define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16) | ||
60 | |||
61 | #define vfp_single(inst) (((inst) & 0x0000f00) == 0xa00) | ||
62 | |||
63 | #define FPSCR_N (1 << 31) | ||
64 | #define FPSCR_Z (1 << 30) | ||
65 | #define FPSCR_C (1 << 29) | ||
66 | #define FPSCR_V (1 << 28) | ||
67 | |||
68 | /* | ||
69 | * Since we aren't building with -mfpu=vfp, we need to code | ||
70 | * these instructions using their MRC/MCR equivalents. | ||
71 | */ | ||
72 | #define vfpreg(_vfp_) #_vfp_ | ||
73 | |||
74 | #define fmrx(_vfp_) ({ \ | ||
75 | u32 __v; \ | ||
76 | asm("mrc%? p10, 7, %0, " vfpreg(_vfp_) ", cr0, 0 @ fmrx %0, " #_vfp_ \ | ||
77 | : "=r" (__v)); \ | ||
78 | __v; \ | ||
79 | }) | ||
80 | |||
81 | #define fmxr(_vfp_,_var_) \ | ||
82 | asm("mcr%? p10, 7, %0, " vfpreg(_vfp_) ", cr0, 0 @ fmxr " #_vfp_ ", %0" \ | ||
83 | : : "r" (_var_)) | ||
84 | |||
85 | u32 vfp_single_cpdo(u32 inst, u32 fpscr); | ||
86 | u32 vfp_single_cprt(u32 inst, u32 fpscr, struct pt_regs *regs); | ||
87 | |||
88 | u32 vfp_double_cpdo(u32 inst, u32 fpscr); | ||
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c new file mode 100644 index 000000000000..3aeedd2afc70 --- /dev/null +++ b/arch/arm/vfp/vfpmodule.c | |||
@@ -0,0 +1,288 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/vfp/vfpmodule.c | ||
3 | * | ||
4 | * Copyright (C) 2004 ARM Limited. | ||
5 | * Written by Deep Blue Solutions Limited. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/config.h> | ||
13 | #include <linux/types.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/signal.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <asm/vfp.h> | ||
19 | |||
20 | #include "vfpinstr.h" | ||
21 | #include "vfp.h" | ||
22 | |||
23 | /* | ||
24 | * Our undef handlers (in entry.S) | ||
25 | */ | ||
26 | void vfp_testing_entry(void); | ||
27 | void vfp_support_entry(void); | ||
28 | |||
29 | void (*vfp_vector)(void) = vfp_testing_entry; | ||
30 | union vfp_state *last_VFP_context; | ||
31 | |||
32 | /* | ||
33 | * Dual-use variable. | ||
34 | * Used in startup: set to non-zero if VFP checks fail | ||
35 | * After startup, holds VFP architecture | ||
36 | */ | ||
37 | unsigned int VFP_arch; | ||
38 | |||
39 | /* | ||
40 | * Per-thread VFP initialisation. | ||
41 | */ | ||
42 | void vfp_flush_thread(union vfp_state *vfp) | ||
43 | { | ||
44 | memset(vfp, 0, sizeof(union vfp_state)); | ||
45 | |||
46 | vfp->hard.fpexc = FPEXC_ENABLE; | ||
47 | vfp->hard.fpscr = FPSCR_ROUND_NEAREST; | ||
48 | |||
49 | /* | ||
50 | * Disable VFP to ensure we initialise it first. | ||
51 | */ | ||
52 | fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_ENABLE); | ||
53 | |||
54 | /* | ||
55 | * Ensure we don't try to overwrite our newly initialised | ||
56 | * state information on the first fault. | ||
57 | */ | ||
58 | if (last_VFP_context == vfp) | ||
59 | last_VFP_context = NULL; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Per-thread VFP cleanup. | ||
64 | */ | ||
65 | void vfp_release_thread(union vfp_state *vfp) | ||
66 | { | ||
67 | if (last_VFP_context == vfp) | ||
68 | last_VFP_context = NULL; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * Raise a SIGFPE for the current process. | ||
73 | * sicode describes the signal being raised. | ||
74 | */ | ||
75 | void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) | ||
76 | { | ||
77 | siginfo_t info; | ||
78 | |||
79 | memset(&info, 0, sizeof(info)); | ||
80 | |||
81 | info.si_signo = SIGFPE; | ||
82 | info.si_code = sicode; | ||
83 | info.si_addr = (void *)(instruction_pointer(regs) - 4); | ||
84 | |||
85 | /* | ||
86 | * This is the same as NWFPE, because it's not clear what | ||
87 | * this is used for | ||
88 | */ | ||
89 | current->thread.error_code = 0; | ||
90 | current->thread.trap_no = 6; | ||
91 | |||
92 | force_sig_info(SIGFPE, &info, current); | ||
93 | } | ||
94 | |||
95 | static void vfp_panic(char *reason) | ||
96 | { | ||
97 | int i; | ||
98 | |||
99 | printk(KERN_ERR "VFP: Error: %s\n", reason); | ||
100 | printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n", | ||
101 | fmrx(FPEXC), fmrx(FPSCR), fmrx(FPINST)); | ||
102 | for (i = 0; i < 32; i += 2) | ||
103 | printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n", | ||
104 | i, vfp_get_float(i), i+1, vfp_get_float(i+1)); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Process bitmask of exception conditions. | ||
109 | */ | ||
110 | static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs) | ||
111 | { | ||
112 | int si_code = 0; | ||
113 | |||
114 | pr_debug("VFP: raising exceptions %08x\n", exceptions); | ||
115 | |||
116 | if (exceptions == (u32)-1) { | ||
117 | vfp_panic("unhandled bounce"); | ||
118 | vfp_raise_sigfpe(0, regs); | ||
119 | return; | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * If any of the status flags are set, update the FPSCR. | ||
124 | * Comparison instructions always return at least one of | ||
125 | * these flags set. | ||
126 | */ | ||
127 | if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | ||
128 | fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V); | ||
129 | |||
130 | fpscr |= exceptions; | ||
131 | |||
132 | fmxr(FPSCR, fpscr); | ||
133 | |||
134 | #define RAISE(stat,en,sig) \ | ||
135 | if (exceptions & stat && fpscr & en) \ | ||
136 | si_code = sig; | ||
137 | |||
138 | /* | ||
139 | * These are arranged in priority order, least to highest. | ||
140 | */ | ||
141 | RAISE(FPSCR_IXC, FPSCR_IXE, FPE_FLTRES); | ||
142 | RAISE(FPSCR_UFC, FPSCR_UFE, FPE_FLTUND); | ||
143 | RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF); | ||
144 | RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV); | ||
145 | |||
146 | if (si_code) | ||
147 | vfp_raise_sigfpe(si_code, regs); | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * Emulate a VFP instruction. | ||
152 | */ | ||
153 | static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) | ||
154 | { | ||
155 | u32 exceptions = (u32)-1; | ||
156 | |||
157 | pr_debug("VFP: emulate: INST=0x%08x SCR=0x%08x\n", inst, fpscr); | ||
158 | |||
159 | if (INST_CPRTDO(inst)) { | ||
160 | if (!INST_CPRT(inst)) { | ||
161 | /* | ||
162 | * CPDO | ||
163 | */ | ||
164 | if (vfp_single(inst)) { | ||
165 | exceptions = vfp_single_cpdo(inst, fpscr); | ||
166 | } else { | ||
167 | exceptions = vfp_double_cpdo(inst, fpscr); | ||
168 | } | ||
169 | } else { | ||
170 | /* | ||
171 | * A CPRT instruction can not appear in FPINST2, nor | ||
172 | * can it cause an exception. Therefore, we do not | ||
173 | * have to emulate it. | ||
174 | */ | ||
175 | } | ||
176 | } else { | ||
177 | /* | ||
178 | * A CPDT instruction can not appear in FPINST2, nor can | ||
179 | * it cause an exception. Therefore, we do not have to | ||
180 | * emulate it. | ||
181 | */ | ||
182 | } | ||
183 | return exceptions; | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Package up a bounce condition. | ||
188 | */ | ||
189 | void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) | ||
190 | { | ||
191 | u32 fpscr, orig_fpscr, exceptions, inst; | ||
192 | |||
193 | pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); | ||
194 | |||
195 | /* | ||
196 | * Enable access to the VFP so we can handle the bounce. | ||
197 | */ | ||
198 | fmxr(FPEXC, fpexc & ~(FPEXC_EXCEPTION|FPEXC_INV|FPEXC_UFC|FPEXC_IOC)); | ||
199 | |||
200 | orig_fpscr = fpscr = fmrx(FPSCR); | ||
201 | |||
202 | /* | ||
203 | * If we are running with inexact exceptions enabled, we need to | ||
204 | * emulate the trigger instruction. Note that as we're emulating | ||
205 | * the trigger instruction, we need to increment PC. | ||
206 | */ | ||
207 | if (fpscr & FPSCR_IXE) { | ||
208 | regs->ARM_pc += 4; | ||
209 | goto emulate; | ||
210 | } | ||
211 | |||
212 | barrier(); | ||
213 | |||
214 | /* | ||
215 | * Modify fpscr to indicate the number of iterations remaining | ||
216 | */ | ||
217 | if (fpexc & FPEXC_EXCEPTION) { | ||
218 | u32 len; | ||
219 | |||
220 | len = fpexc + (1 << FPEXC_LENGTH_BIT); | ||
221 | |||
222 | fpscr &= ~FPSCR_LENGTH_MASK; | ||
223 | fpscr |= (len & FPEXC_LENGTH_MASK) << (FPSCR_LENGTH_BIT - FPEXC_LENGTH_BIT); | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * Handle the first FP instruction. We used to take note of the | ||
228 | * FPEXC bounce reason, but this appears to be unreliable. | ||
229 | * Emulate the bounced instruction instead. | ||
230 | */ | ||
231 | inst = fmrx(FPINST); | ||
232 | exceptions = vfp_emulate_instruction(inst, fpscr, regs); | ||
233 | if (exceptions) | ||
234 | vfp_raise_exceptions(exceptions, inst, orig_fpscr, regs); | ||
235 | |||
236 | /* | ||
237 | * If there isn't a second FP instruction, exit now. | ||
238 | */ | ||
239 | if (!(fpexc & FPEXC_FPV2)) | ||
240 | return; | ||
241 | |||
242 | /* | ||
243 | * The barrier() here prevents fpinst2 being read | ||
244 | * before the condition above. | ||
245 | */ | ||
246 | barrier(); | ||
247 | trigger = fmrx(FPINST2); | ||
248 | fpscr = fmrx(FPSCR); | ||
249 | |||
250 | emulate: | ||
251 | exceptions = vfp_emulate_instruction(trigger, fpscr, regs); | ||
252 | if (exceptions) | ||
253 | vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * VFP support code initialisation. | ||
258 | */ | ||
259 | static int __init vfp_init(void) | ||
260 | { | ||
261 | unsigned int vfpsid; | ||
262 | |||
263 | /* | ||
264 | * First check that there is a VFP that we can use. | ||
265 | * The handler is already setup to just log calls, so | ||
266 | * we just need to read the VFPSID register. | ||
267 | */ | ||
268 | vfpsid = fmrx(FPSID); | ||
269 | |||
270 | printk(KERN_INFO "VFP support v0.3: "); | ||
271 | if (VFP_arch) { | ||
272 | printk("not present\n"); | ||
273 | } else if (vfpsid & FPSID_NODOUBLE) { | ||
274 | printk("no double precision support\n"); | ||
275 | } else { | ||
276 | VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; /* Extract the architecture version */ | ||
277 | printk("implementor %02x architecture %d part %02x variant %x rev %x\n", | ||
278 | (vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT, | ||
279 | (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT, | ||
280 | (vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT, | ||
281 | (vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT, | ||
282 | (vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT); | ||
283 | vfp_vector = vfp_support_entry; | ||
284 | } | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | late_initcall(vfp_init); | ||
diff --git a/arch/arm/vfp/vfpsingle.c b/arch/arm/vfp/vfpsingle.c new file mode 100644 index 000000000000..6849fe35cb2e --- /dev/null +++ b/arch/arm/vfp/vfpsingle.c | |||
@@ -0,0 +1,1224 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/vfp/vfpsingle.c | ||
3 | * | ||
4 | * This code is derived in part from John R. Housers softfloat library, which | ||
5 | * carries the following notice: | ||
6 | * | ||
7 | * =========================================================================== | ||
8 | * This C source file is part of the SoftFloat IEC/IEEE Floating-point | ||
9 | * Arithmetic Package, Release 2. | ||
10 | * | ||
11 | * Written by John R. Hauser. This work was made possible in part by the | ||
12 | * International Computer Science Institute, located at Suite 600, 1947 Center | ||
13 | * Street, Berkeley, California 94704. Funding was partially provided by the | ||
14 | * National Science Foundation under grant MIP-9311980. The original version | ||
15 | * of this code was written as part of a project to build a fixed-point vector | ||
16 | * processor in collaboration with the University of California at Berkeley, | ||
17 | * overseen by Profs. Nelson Morgan and John Wawrzynek. More information | ||
18 | * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | ||
19 | * arithmetic/softfloat.html'. | ||
20 | * | ||
21 | * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | ||
22 | * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | ||
23 | * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | ||
24 | * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | ||
25 | * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | ||
26 | * | ||
27 | * Derivative works are acceptable, even for commercial purposes, so long as | ||
28 | * (1) they include prominent notice that the work is derivative, and (2) they | ||
29 | * include prominent notice akin to these three paragraphs for those parts of | ||
30 | * this code that are retained. | ||
31 | * =========================================================================== | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/bitops.h> | ||
35 | #include <asm/ptrace.h> | ||
36 | #include <asm/vfp.h> | ||
37 | |||
38 | #include "vfpinstr.h" | ||
39 | #include "vfp.h" | ||
40 | |||
41 | static struct vfp_single vfp_single_default_qnan = { | ||
42 | .exponent = 255, | ||
43 | .sign = 0, | ||
44 | .significand = VFP_SINGLE_SIGNIFICAND_QNAN, | ||
45 | }; | ||
46 | |||
47 | static void vfp_single_dump(const char *str, struct vfp_single *s) | ||
48 | { | ||
49 | pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", | ||
50 | str, s->sign != 0, s->exponent, s->significand); | ||
51 | } | ||
52 | |||
53 | static void vfp_single_normalise_denormal(struct vfp_single *vs) | ||
54 | { | ||
55 | int bits = 31 - fls(vs->significand); | ||
56 | |||
57 | vfp_single_dump("normalise_denormal: in", vs); | ||
58 | |||
59 | if (bits) { | ||
60 | vs->exponent -= bits - 1; | ||
61 | vs->significand <<= bits; | ||
62 | } | ||
63 | |||
64 | vfp_single_dump("normalise_denormal: out", vs); | ||
65 | } | ||
66 | |||
67 | #ifndef DEBUG | ||
68 | #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) | ||
69 | u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) | ||
70 | #else | ||
71 | u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) | ||
72 | #endif | ||
73 | { | ||
74 | u32 significand, incr, rmode; | ||
75 | int exponent, shift, underflow; | ||
76 | |||
77 | vfp_single_dump("pack: in", vs); | ||
78 | |||
79 | /* | ||
80 | * Infinities and NaNs are a special case. | ||
81 | */ | ||
82 | if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) | ||
83 | goto pack; | ||
84 | |||
85 | /* | ||
86 | * Special-case zero. | ||
87 | */ | ||
88 | if (vs->significand == 0) { | ||
89 | vs->exponent = 0; | ||
90 | goto pack; | ||
91 | } | ||
92 | |||
93 | exponent = vs->exponent; | ||
94 | significand = vs->significand; | ||
95 | |||
96 | /* | ||
97 | * Normalise first. Note that we shift the significand up to | ||
98 | * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least | ||
99 | * significant bit. | ||
100 | */ | ||
101 | shift = 32 - fls(significand); | ||
102 | if (shift < 32 && shift) { | ||
103 | exponent -= shift; | ||
104 | significand <<= shift; | ||
105 | } | ||
106 | |||
107 | #ifdef DEBUG | ||
108 | vs->exponent = exponent; | ||
109 | vs->significand = significand; | ||
110 | vfp_single_dump("pack: normalised", vs); | ||
111 | #endif | ||
112 | |||
113 | /* | ||
114 | * Tiny number? | ||
115 | */ | ||
116 | underflow = exponent < 0; | ||
117 | if (underflow) { | ||
118 | significand = vfp_shiftright32jamming(significand, -exponent); | ||
119 | exponent = 0; | ||
120 | #ifdef DEBUG | ||
121 | vs->exponent = exponent; | ||
122 | vs->significand = significand; | ||
123 | vfp_single_dump("pack: tiny number", vs); | ||
124 | #endif | ||
125 | if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) | ||
126 | underflow = 0; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Select rounding increment. | ||
131 | */ | ||
132 | incr = 0; | ||
133 | rmode = fpscr & FPSCR_RMODE_MASK; | ||
134 | |||
135 | if (rmode == FPSCR_ROUND_NEAREST) { | ||
136 | incr = 1 << VFP_SINGLE_LOW_BITS; | ||
137 | if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) | ||
138 | incr -= 1; | ||
139 | } else if (rmode == FPSCR_ROUND_TOZERO) { | ||
140 | incr = 0; | ||
141 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) | ||
142 | incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; | ||
143 | |||
144 | pr_debug("VFP: rounding increment = 0x%08x\n", incr); | ||
145 | |||
146 | /* | ||
147 | * Is our rounding going to overflow? | ||
148 | */ | ||
149 | if ((significand + incr) < significand) { | ||
150 | exponent += 1; | ||
151 | significand = (significand >> 1) | (significand & 1); | ||
152 | incr >>= 1; | ||
153 | #ifdef DEBUG | ||
154 | vs->exponent = exponent; | ||
155 | vs->significand = significand; | ||
156 | vfp_single_dump("pack: overflow", vs); | ||
157 | #endif | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * If any of the low bits (which will be shifted out of the | ||
162 | * number) are non-zero, the result is inexact. | ||
163 | */ | ||
164 | if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) | ||
165 | exceptions |= FPSCR_IXC; | ||
166 | |||
167 | /* | ||
168 | * Do our rounding. | ||
169 | */ | ||
170 | significand += incr; | ||
171 | |||
172 | /* | ||
173 | * Infinity? | ||
174 | */ | ||
175 | if (exponent >= 254) { | ||
176 | exceptions |= FPSCR_OFC | FPSCR_IXC; | ||
177 | if (incr == 0) { | ||
178 | vs->exponent = 253; | ||
179 | vs->significand = 0x7fffffff; | ||
180 | } else { | ||
181 | vs->exponent = 255; /* infinity */ | ||
182 | vs->significand = 0; | ||
183 | } | ||
184 | } else { | ||
185 | if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) | ||
186 | exponent = 0; | ||
187 | if (exponent || significand > 0x80000000) | ||
188 | underflow = 0; | ||
189 | if (underflow) | ||
190 | exceptions |= FPSCR_UFC; | ||
191 | vs->exponent = exponent; | ||
192 | vs->significand = significand >> 1; | ||
193 | } | ||
194 | |||
195 | pack: | ||
196 | vfp_single_dump("pack: final", vs); | ||
197 | { | ||
198 | s32 d = vfp_single_pack(vs); | ||
199 | pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, | ||
200 | sd, d, exceptions); | ||
201 | vfp_put_float(sd, d); | ||
202 | } | ||
203 | |||
204 | return exceptions & ~VFP_NAN_FLAG; | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Propagate the NaN, setting exceptions if it is signalling. | ||
209 | * 'n' is always a NaN. 'm' may be a number, NaN or infinity. | ||
210 | */ | ||
211 | static u32 | ||
212 | vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, | ||
213 | struct vfp_single *vsm, u32 fpscr) | ||
214 | { | ||
215 | struct vfp_single *nan; | ||
216 | int tn, tm = 0; | ||
217 | |||
218 | tn = vfp_single_type(vsn); | ||
219 | |||
220 | if (vsm) | ||
221 | tm = vfp_single_type(vsm); | ||
222 | |||
223 | if (fpscr & FPSCR_DEFAULT_NAN) | ||
224 | /* | ||
225 | * Default NaN mode - always returns a quiet NaN | ||
226 | */ | ||
227 | nan = &vfp_single_default_qnan; | ||
228 | else { | ||
229 | /* | ||
230 | * Contemporary mode - select the first signalling | ||
231 | * NAN, or if neither are signalling, the first | ||
232 | * quiet NAN. | ||
233 | */ | ||
234 | if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) | ||
235 | nan = vsn; | ||
236 | else | ||
237 | nan = vsm; | ||
238 | /* | ||
239 | * Make the NaN quiet. | ||
240 | */ | ||
241 | nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | ||
242 | } | ||
243 | |||
244 | *vsd = *nan; | ||
245 | |||
246 | /* | ||
247 | * If one was a signalling NAN, raise invalid operation. | ||
248 | */ | ||
249 | return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; | ||
250 | } | ||
251 | |||
252 | |||
253 | /* | ||
254 | * Extended operations | ||
255 | */ | ||
256 | static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) | ||
257 | { | ||
258 | vfp_put_float(sd, vfp_single_packed_abs(m)); | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) | ||
263 | { | ||
264 | vfp_put_float(sd, m); | ||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) | ||
269 | { | ||
270 | vfp_put_float(sd, vfp_single_packed_negate(m)); | ||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | static const u16 sqrt_oddadjust[] = { | ||
275 | 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, | ||
276 | 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 | ||
277 | }; | ||
278 | |||
279 | static const u16 sqrt_evenadjust[] = { | ||
280 | 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, | ||
281 | 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 | ||
282 | }; | ||
283 | |||
284 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) | ||
285 | { | ||
286 | int index; | ||
287 | u32 z, a; | ||
288 | |||
289 | if ((significand & 0xc0000000) != 0x40000000) { | ||
290 | printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); | ||
291 | } | ||
292 | |||
293 | a = significand << 1; | ||
294 | index = (a >> 27) & 15; | ||
295 | if (exponent & 1) { | ||
296 | z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; | ||
297 | z = ((a / z) << 14) + (z << 15); | ||
298 | a >>= 1; | ||
299 | } else { | ||
300 | z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; | ||
301 | z = a / z + z; | ||
302 | z = (z >= 0x20000) ? 0xffff8000 : (z << 15); | ||
303 | if (z <= a) | ||
304 | return (s32)a >> 1; | ||
305 | } | ||
306 | return (u32)(((u64)a << 31) / z) + (z >> 1); | ||
307 | } | ||
308 | |||
309 | static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) | ||
310 | { | ||
311 | struct vfp_single vsm, vsd; | ||
312 | int ret, tm; | ||
313 | |||
314 | vfp_single_unpack(&vsm, m); | ||
315 | tm = vfp_single_type(&vsm); | ||
316 | if (tm & (VFP_NAN|VFP_INFINITY)) { | ||
317 | struct vfp_single *vsp = &vsd; | ||
318 | |||
319 | if (tm & VFP_NAN) | ||
320 | ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); | ||
321 | else if (vsm.sign == 0) { | ||
322 | sqrt_copy: | ||
323 | vsp = &vsm; | ||
324 | ret = 0; | ||
325 | } else { | ||
326 | sqrt_invalid: | ||
327 | vsp = &vfp_single_default_qnan; | ||
328 | ret = FPSCR_IOC; | ||
329 | } | ||
330 | vfp_put_float(sd, vfp_single_pack(vsp)); | ||
331 | return ret; | ||
332 | } | ||
333 | |||
334 | /* | ||
335 | * sqrt(+/- 0) == +/- 0 | ||
336 | */ | ||
337 | if (tm & VFP_ZERO) | ||
338 | goto sqrt_copy; | ||
339 | |||
340 | /* | ||
341 | * Normalise a denormalised number | ||
342 | */ | ||
343 | if (tm & VFP_DENORMAL) | ||
344 | vfp_single_normalise_denormal(&vsm); | ||
345 | |||
346 | /* | ||
347 | * sqrt(<0) = invalid | ||
348 | */ | ||
349 | if (vsm.sign) | ||
350 | goto sqrt_invalid; | ||
351 | |||
352 | vfp_single_dump("sqrt", &vsm); | ||
353 | |||
354 | /* | ||
355 | * Estimate the square root. | ||
356 | */ | ||
357 | vsd.sign = 0; | ||
358 | vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; | ||
359 | vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; | ||
360 | |||
361 | vfp_single_dump("sqrt estimate", &vsd); | ||
362 | |||
363 | /* | ||
364 | * And now adjust. | ||
365 | */ | ||
366 | if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { | ||
367 | if (vsd.significand < 2) { | ||
368 | vsd.significand = 0xffffffff; | ||
369 | } else { | ||
370 | u64 term; | ||
371 | s64 rem; | ||
372 | vsm.significand <<= !(vsm.exponent & 1); | ||
373 | term = (u64)vsd.significand * vsd.significand; | ||
374 | rem = ((u64)vsm.significand << 32) - term; | ||
375 | |||
376 | pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); | ||
377 | |||
378 | while (rem < 0) { | ||
379 | vsd.significand -= 1; | ||
380 | rem += ((u64)vsd.significand << 1) | 1; | ||
381 | } | ||
382 | vsd.significand |= rem != 0; | ||
383 | } | ||
384 | } | ||
385 | vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); | ||
386 | |||
387 | return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); | ||
388 | } | ||
389 | |||
390 | /* | ||
391 | * Equal := ZC | ||
392 | * Less than := N | ||
393 | * Greater than := C | ||
394 | * Unordered := CV | ||
395 | */ | ||
396 | static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) | ||
397 | { | ||
398 | s32 d; | ||
399 | u32 ret = 0; | ||
400 | |||
401 | d = vfp_get_float(sd); | ||
402 | if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { | ||
403 | ret |= FPSCR_C | FPSCR_V; | ||
404 | if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | ||
405 | /* | ||
406 | * Signalling NaN, or signalling on quiet NaN | ||
407 | */ | ||
408 | ret |= FPSCR_IOC; | ||
409 | } | ||
410 | |||
411 | if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { | ||
412 | ret |= FPSCR_C | FPSCR_V; | ||
413 | if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | ||
414 | /* | ||
415 | * Signalling NaN, or signalling on quiet NaN | ||
416 | */ | ||
417 | ret |= FPSCR_IOC; | ||
418 | } | ||
419 | |||
420 | if (ret == 0) { | ||
421 | if (d == m || vfp_single_packed_abs(d | m) == 0) { | ||
422 | /* | ||
423 | * equal | ||
424 | */ | ||
425 | ret |= FPSCR_Z | FPSCR_C; | ||
426 | } else if (vfp_single_packed_sign(d ^ m)) { | ||
427 | /* | ||
428 | * different signs | ||
429 | */ | ||
430 | if (vfp_single_packed_sign(d)) | ||
431 | /* | ||
432 | * d is negative, so d < m | ||
433 | */ | ||
434 | ret |= FPSCR_N; | ||
435 | else | ||
436 | /* | ||
437 | * d is positive, so d > m | ||
438 | */ | ||
439 | ret |= FPSCR_C; | ||
440 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { | ||
441 | /* | ||
442 | * d < m | ||
443 | */ | ||
444 | ret |= FPSCR_N; | ||
445 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { | ||
446 | /* | ||
447 | * d > m | ||
448 | */ | ||
449 | ret |= FPSCR_C; | ||
450 | } | ||
451 | } | ||
452 | return ret; | ||
453 | } | ||
454 | |||
455 | static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) | ||
456 | { | ||
457 | return vfp_compare(sd, 0, m, fpscr); | ||
458 | } | ||
459 | |||
460 | static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) | ||
461 | { | ||
462 | return vfp_compare(sd, 1, m, fpscr); | ||
463 | } | ||
464 | |||
465 | static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) | ||
466 | { | ||
467 | return vfp_compare(sd, 0, 0, fpscr); | ||
468 | } | ||
469 | |||
470 | static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) | ||
471 | { | ||
472 | return vfp_compare(sd, 1, 0, fpscr); | ||
473 | } | ||
474 | |||
475 | static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) | ||
476 | { | ||
477 | struct vfp_single vsm; | ||
478 | struct vfp_double vdd; | ||
479 | int tm; | ||
480 | u32 exceptions = 0; | ||
481 | |||
482 | vfp_single_unpack(&vsm, m); | ||
483 | |||
484 | tm = vfp_single_type(&vsm); | ||
485 | |||
486 | /* | ||
487 | * If we have a signalling NaN, signal invalid operation. | ||
488 | */ | ||
489 | if (tm == VFP_SNAN) | ||
490 | exceptions = FPSCR_IOC; | ||
491 | |||
492 | if (tm & VFP_DENORMAL) | ||
493 | vfp_single_normalise_denormal(&vsm); | ||
494 | |||
495 | vdd.sign = vsm.sign; | ||
496 | vdd.significand = (u64)vsm.significand << 32; | ||
497 | |||
498 | /* | ||
499 | * If we have an infinity or NaN, the exponent must be 2047. | ||
500 | */ | ||
501 | if (tm & (VFP_INFINITY|VFP_NAN)) { | ||
502 | vdd.exponent = 2047; | ||
503 | if (tm & VFP_NAN) | ||
504 | vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | ||
505 | goto pack_nan; | ||
506 | } else if (tm & VFP_ZERO) | ||
507 | vdd.exponent = 0; | ||
508 | else | ||
509 | vdd.exponent = vsm.exponent + (1023 - 127); | ||
510 | |||
511 | /* | ||
512 | * Technically, if bit 0 of dd is set, this is an invalid | ||
513 | * instruction. However, we ignore this for efficiency. | ||
514 | */ | ||
515 | return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); | ||
516 | |||
517 | pack_nan: | ||
518 | vfp_put_double(dd, vfp_double_pack(&vdd)); | ||
519 | return exceptions; | ||
520 | } | ||
521 | |||
522 | static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) | ||
523 | { | ||
524 | struct vfp_single vs; | ||
525 | |||
526 | vs.sign = 0; | ||
527 | vs.exponent = 127 + 31 - 1; | ||
528 | vs.significand = (u32)m; | ||
529 | |||
530 | return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); | ||
531 | } | ||
532 | |||
533 | static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) | ||
534 | { | ||
535 | struct vfp_single vs; | ||
536 | |||
537 | vs.sign = (m & 0x80000000) >> 16; | ||
538 | vs.exponent = 127 + 31 - 1; | ||
539 | vs.significand = vs.sign ? -m : m; | ||
540 | |||
541 | return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); | ||
542 | } | ||
543 | |||
544 | static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) | ||
545 | { | ||
546 | struct vfp_single vsm; | ||
547 | u32 d, exceptions = 0; | ||
548 | int rmode = fpscr & FPSCR_RMODE_MASK; | ||
549 | int tm; | ||
550 | |||
551 | vfp_single_unpack(&vsm, m); | ||
552 | vfp_single_dump("VSM", &vsm); | ||
553 | |||
554 | /* | ||
555 | * Do we have a denormalised number? | ||
556 | */ | ||
557 | tm = vfp_single_type(&vsm); | ||
558 | if (tm & VFP_DENORMAL) | ||
559 | exceptions |= FPSCR_IDC; | ||
560 | |||
561 | if (tm & VFP_NAN) | ||
562 | vsm.sign = 0; | ||
563 | |||
564 | if (vsm.exponent >= 127 + 32) { | ||
565 | d = vsm.sign ? 0 : 0xffffffff; | ||
566 | exceptions = FPSCR_IOC; | ||
567 | } else if (vsm.exponent >= 127 - 1) { | ||
568 | int shift = 127 + 31 - vsm.exponent; | ||
569 | u32 rem, incr = 0; | ||
570 | |||
571 | /* | ||
572 | * 2^0 <= m < 2^32-2^8 | ||
573 | */ | ||
574 | d = (vsm.significand << 1) >> shift; | ||
575 | rem = vsm.significand << (33 - shift); | ||
576 | |||
577 | if (rmode == FPSCR_ROUND_NEAREST) { | ||
578 | incr = 0x80000000; | ||
579 | if ((d & 1) == 0) | ||
580 | incr -= 1; | ||
581 | } else if (rmode == FPSCR_ROUND_TOZERO) { | ||
582 | incr = 0; | ||
583 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { | ||
584 | incr = ~0; | ||
585 | } | ||
586 | |||
587 | if ((rem + incr) < rem) { | ||
588 | if (d < 0xffffffff) | ||
589 | d += 1; | ||
590 | else | ||
591 | exceptions |= FPSCR_IOC; | ||
592 | } | ||
593 | |||
594 | if (d && vsm.sign) { | ||
595 | d = 0; | ||
596 | exceptions |= FPSCR_IOC; | ||
597 | } else if (rem) | ||
598 | exceptions |= FPSCR_IXC; | ||
599 | } else { | ||
600 | d = 0; | ||
601 | if (vsm.exponent | vsm.significand) { | ||
602 | exceptions |= FPSCR_IXC; | ||
603 | if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) | ||
604 | d = 1; | ||
605 | else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { | ||
606 | d = 0; | ||
607 | exceptions |= FPSCR_IOC; | ||
608 | } | ||
609 | } | ||
610 | } | ||
611 | |||
612 | pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | ||
613 | |||
614 | vfp_put_float(sd, d); | ||
615 | |||
616 | return exceptions; | ||
617 | } | ||
618 | |||
619 | static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) | ||
620 | { | ||
621 | return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); | ||
622 | } | ||
623 | |||
624 | static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) | ||
625 | { | ||
626 | struct vfp_single vsm; | ||
627 | u32 d, exceptions = 0; | ||
628 | int rmode = fpscr & FPSCR_RMODE_MASK; | ||
629 | |||
630 | vfp_single_unpack(&vsm, m); | ||
631 | vfp_single_dump("VSM", &vsm); | ||
632 | |||
633 | /* | ||
634 | * Do we have a denormalised number? | ||
635 | */ | ||
636 | if (vfp_single_type(&vsm) & VFP_DENORMAL) | ||
637 | exceptions |= FPSCR_IDC; | ||
638 | |||
639 | if (vsm.exponent >= 127 + 32) { | ||
640 | /* | ||
641 | * m >= 2^31-2^7: invalid | ||
642 | */ | ||
643 | d = 0x7fffffff; | ||
644 | if (vsm.sign) | ||
645 | d = ~d; | ||
646 | exceptions |= FPSCR_IOC; | ||
647 | } else if (vsm.exponent >= 127 - 1) { | ||
648 | int shift = 127 + 31 - vsm.exponent; | ||
649 | u32 rem, incr = 0; | ||
650 | |||
651 | /* 2^0 <= m <= 2^31-2^7 */ | ||
652 | d = (vsm.significand << 1) >> shift; | ||
653 | rem = vsm.significand << (33 - shift); | ||
654 | |||
655 | if (rmode == FPSCR_ROUND_NEAREST) { | ||
656 | incr = 0x80000000; | ||
657 | if ((d & 1) == 0) | ||
658 | incr -= 1; | ||
659 | } else if (rmode == FPSCR_ROUND_TOZERO) { | ||
660 | incr = 0; | ||
661 | } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { | ||
662 | incr = ~0; | ||
663 | } | ||
664 | |||
665 | if ((rem + incr) < rem && d < 0xffffffff) | ||
666 | d += 1; | ||
667 | if (d > 0x7fffffff + (vsm.sign != 0)) { | ||
668 | d = 0x7fffffff + (vsm.sign != 0); | ||
669 | exceptions |= FPSCR_IOC; | ||
670 | } else if (rem) | ||
671 | exceptions |= FPSCR_IXC; | ||
672 | |||
673 | if (vsm.sign) | ||
674 | d = -d; | ||
675 | } else { | ||
676 | d = 0; | ||
677 | if (vsm.exponent | vsm.significand) { | ||
678 | exceptions |= FPSCR_IXC; | ||
679 | if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) | ||
680 | d = 1; | ||
681 | else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) | ||
682 | d = -1; | ||
683 | } | ||
684 | } | ||
685 | |||
686 | pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); | ||
687 | |||
688 | vfp_put_float(sd, (s32)d); | ||
689 | |||
690 | return exceptions; | ||
691 | } | ||
692 | |||
693 | static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) | ||
694 | { | ||
695 | return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); | ||
696 | } | ||
697 | |||
698 | static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { | ||
699 | [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy, | ||
700 | [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs, | ||
701 | [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg, | ||
702 | [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt, | ||
703 | [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp, | ||
704 | [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe, | ||
705 | [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz, | ||
706 | [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez, | ||
707 | [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd, | ||
708 | [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito, | ||
709 | [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito, | ||
710 | [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui, | ||
711 | [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz, | ||
712 | [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi, | ||
713 | [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz, | ||
714 | }; | ||
715 | |||
716 | |||
717 | |||
718 | |||
719 | |||
720 | static u32 | ||
721 | vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, | ||
722 | struct vfp_single *vsm, u32 fpscr) | ||
723 | { | ||
724 | struct vfp_single *vsp; | ||
725 | u32 exceptions = 0; | ||
726 | int tn, tm; | ||
727 | |||
728 | tn = vfp_single_type(vsn); | ||
729 | tm = vfp_single_type(vsm); | ||
730 | |||
731 | if (tn & tm & VFP_INFINITY) { | ||
732 | /* | ||
733 | * Two infinities. Are they different signs? | ||
734 | */ | ||
735 | if (vsn->sign ^ vsm->sign) { | ||
736 | /* | ||
737 | * different signs -> invalid | ||
738 | */ | ||
739 | exceptions = FPSCR_IOC; | ||
740 | vsp = &vfp_single_default_qnan; | ||
741 | } else { | ||
742 | /* | ||
743 | * same signs -> valid | ||
744 | */ | ||
745 | vsp = vsn; | ||
746 | } | ||
747 | } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { | ||
748 | /* | ||
749 | * One infinity and one number -> infinity | ||
750 | */ | ||
751 | vsp = vsn; | ||
752 | } else { | ||
753 | /* | ||
754 | * 'n' is a NaN of some type | ||
755 | */ | ||
756 | return vfp_propagate_nan(vsd, vsn, vsm, fpscr); | ||
757 | } | ||
758 | *vsd = *vsp; | ||
759 | return exceptions; | ||
760 | } | ||
761 | |||
762 | static u32 | ||
763 | vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, | ||
764 | struct vfp_single *vsm, u32 fpscr) | ||
765 | { | ||
766 | u32 exp_diff, m_sig; | ||
767 | |||
768 | if (vsn->significand & 0x80000000 || | ||
769 | vsm->significand & 0x80000000) { | ||
770 | pr_info("VFP: bad FP values in %s\n", __func__); | ||
771 | vfp_single_dump("VSN", vsn); | ||
772 | vfp_single_dump("VSM", vsm); | ||
773 | } | ||
774 | |||
775 | /* | ||
776 | * Ensure that 'n' is the largest magnitude number. Note that | ||
777 | * if 'n' and 'm' have equal exponents, we do not swap them. | ||
778 | * This ensures that NaN propagation works correctly. | ||
779 | */ | ||
780 | if (vsn->exponent < vsm->exponent) { | ||
781 | struct vfp_single *t = vsn; | ||
782 | vsn = vsm; | ||
783 | vsm = t; | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | * Is 'n' an infinity or a NaN? Note that 'm' may be a number, | ||
788 | * infinity or a NaN here. | ||
789 | */ | ||
790 | if (vsn->exponent == 255) | ||
791 | return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); | ||
792 | |||
793 | /* | ||
794 | * We have two proper numbers, where 'vsn' is the larger magnitude. | ||
795 | * | ||
796 | * Copy 'n' to 'd' before doing the arithmetic. | ||
797 | */ | ||
798 | *vsd = *vsn; | ||
799 | |||
800 | /* | ||
801 | * Align both numbers. | ||
802 | */ | ||
803 | exp_diff = vsn->exponent - vsm->exponent; | ||
804 | m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); | ||
805 | |||
806 | /* | ||
807 | * If the signs are different, we are really subtracting. | ||
808 | */ | ||
809 | if (vsn->sign ^ vsm->sign) { | ||
810 | m_sig = vsn->significand - m_sig; | ||
811 | if ((s32)m_sig < 0) { | ||
812 | vsd->sign = vfp_sign_negate(vsd->sign); | ||
813 | m_sig = -m_sig; | ||
814 | } else if (m_sig == 0) { | ||
815 | vsd->sign = (fpscr & FPSCR_RMODE_MASK) == | ||
816 | FPSCR_ROUND_MINUSINF ? 0x8000 : 0; | ||
817 | } | ||
818 | } else { | ||
819 | m_sig = vsn->significand + m_sig; | ||
820 | } | ||
821 | vsd->significand = m_sig; | ||
822 | |||
823 | return 0; | ||
824 | } | ||
825 | |||
826 | static u32 | ||
827 | vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) | ||
828 | { | ||
829 | vfp_single_dump("VSN", vsn); | ||
830 | vfp_single_dump("VSM", vsm); | ||
831 | |||
832 | /* | ||
833 | * Ensure that 'n' is the largest magnitude number. Note that | ||
834 | * if 'n' and 'm' have equal exponents, we do not swap them. | ||
835 | * This ensures that NaN propagation works correctly. | ||
836 | */ | ||
837 | if (vsn->exponent < vsm->exponent) { | ||
838 | struct vfp_single *t = vsn; | ||
839 | vsn = vsm; | ||
840 | vsm = t; | ||
841 | pr_debug("VFP: swapping M <-> N\n"); | ||
842 | } | ||
843 | |||
844 | vsd->sign = vsn->sign ^ vsm->sign; | ||
845 | |||
846 | /* | ||
847 | * If 'n' is an infinity or NaN, handle it. 'm' may be anything. | ||
848 | */ | ||
849 | if (vsn->exponent == 255) { | ||
850 | if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) | ||
851 | return vfp_propagate_nan(vsd, vsn, vsm, fpscr); | ||
852 | if ((vsm->exponent | vsm->significand) == 0) { | ||
853 | *vsd = vfp_single_default_qnan; | ||
854 | return FPSCR_IOC; | ||
855 | } | ||
856 | vsd->exponent = vsn->exponent; | ||
857 | vsd->significand = 0; | ||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | /* | ||
862 | * If 'm' is zero, the result is always zero. In this case, | ||
863 | * 'n' may be zero or a number, but it doesn't matter which. | ||
864 | */ | ||
865 | if ((vsm->exponent | vsm->significand) == 0) { | ||
866 | vsd->exponent = 0; | ||
867 | vsd->significand = 0; | ||
868 | return 0; | ||
869 | } | ||
870 | |||
871 | /* | ||
872 | * We add 2 to the destination exponent for the same reason as | ||
873 | * the addition case - though this time we have +1 from each | ||
874 | * input operand. | ||
875 | */ | ||
876 | vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; | ||
877 | vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); | ||
878 | |||
879 | vfp_single_dump("VSD", vsd); | ||
880 | return 0; | ||
881 | } | ||
882 | |||
883 | #define NEG_MULTIPLY (1 << 0) | ||
884 | #define NEG_SUBTRACT (1 << 1) | ||
885 | |||
886 | static u32 | ||
887 | vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) | ||
888 | { | ||
889 | struct vfp_single vsd, vsp, vsn, vsm; | ||
890 | u32 exceptions; | ||
891 | s32 v; | ||
892 | |||
893 | v = vfp_get_float(sn); | ||
894 | pr_debug("VFP: s%u = %08x\n", sn, v); | ||
895 | vfp_single_unpack(&vsn, v); | ||
896 | if (vsn.exponent == 0 && vsn.significand) | ||
897 | vfp_single_normalise_denormal(&vsn); | ||
898 | |||
899 | vfp_single_unpack(&vsm, m); | ||
900 | if (vsm.exponent == 0 && vsm.significand) | ||
901 | vfp_single_normalise_denormal(&vsm); | ||
902 | |||
903 | exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); | ||
904 | if (negate & NEG_MULTIPLY) | ||
905 | vsp.sign = vfp_sign_negate(vsp.sign); | ||
906 | |||
907 | v = vfp_get_float(sd); | ||
908 | pr_debug("VFP: s%u = %08x\n", sd, v); | ||
909 | vfp_single_unpack(&vsn, v); | ||
910 | if (negate & NEG_SUBTRACT) | ||
911 | vsn.sign = vfp_sign_negate(vsn.sign); | ||
912 | |||
913 | exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); | ||
914 | |||
915 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); | ||
916 | } | ||
917 | |||
918 | /* | ||
919 | * Standard operations | ||
920 | */ | ||
921 | |||
922 | /* | ||
923 | * sd = sd + (sn * sm) | ||
924 | */ | ||
925 | static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) | ||
926 | { | ||
927 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); | ||
928 | } | ||
929 | |||
930 | /* | ||
931 | * sd = sd - (sn * sm) | ||
932 | */ | ||
933 | static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) | ||
934 | { | ||
935 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); | ||
936 | } | ||
937 | |||
938 | /* | ||
939 | * sd = -sd + (sn * sm) | ||
940 | */ | ||
941 | static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) | ||
942 | { | ||
943 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); | ||
944 | } | ||
945 | |||
946 | /* | ||
947 | * sd = -sd - (sn * sm) | ||
948 | */ | ||
949 | static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) | ||
950 | { | ||
951 | return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); | ||
952 | } | ||
953 | |||
954 | /* | ||
955 | * sd = sn * sm | ||
956 | */ | ||
957 | static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) | ||
958 | { | ||
959 | struct vfp_single vsd, vsn, vsm; | ||
960 | u32 exceptions; | ||
961 | s32 n = vfp_get_float(sn); | ||
962 | |||
963 | pr_debug("VFP: s%u = %08x\n", sn, n); | ||
964 | |||
965 | vfp_single_unpack(&vsn, n); | ||
966 | if (vsn.exponent == 0 && vsn.significand) | ||
967 | vfp_single_normalise_denormal(&vsn); | ||
968 | |||
969 | vfp_single_unpack(&vsm, m); | ||
970 | if (vsm.exponent == 0 && vsm.significand) | ||
971 | vfp_single_normalise_denormal(&vsm); | ||
972 | |||
973 | exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); | ||
974 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); | ||
975 | } | ||
976 | |||
977 | /* | ||
978 | * sd = -(sn * sm) | ||
979 | */ | ||
980 | static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) | ||
981 | { | ||
982 | struct vfp_single vsd, vsn, vsm; | ||
983 | u32 exceptions; | ||
984 | s32 n = vfp_get_float(sn); | ||
985 | |||
986 | pr_debug("VFP: s%u = %08x\n", sn, n); | ||
987 | |||
988 | vfp_single_unpack(&vsn, n); | ||
989 | if (vsn.exponent == 0 && vsn.significand) | ||
990 | vfp_single_normalise_denormal(&vsn); | ||
991 | |||
992 | vfp_single_unpack(&vsm, m); | ||
993 | if (vsm.exponent == 0 && vsm.significand) | ||
994 | vfp_single_normalise_denormal(&vsm); | ||
995 | |||
996 | exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); | ||
997 | vsd.sign = vfp_sign_negate(vsd.sign); | ||
998 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); | ||
999 | } | ||
1000 | |||
1001 | /* | ||
1002 | * sd = sn + sm | ||
1003 | */ | ||
1004 | static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) | ||
1005 | { | ||
1006 | struct vfp_single vsd, vsn, vsm; | ||
1007 | u32 exceptions; | ||
1008 | s32 n = vfp_get_float(sn); | ||
1009 | |||
1010 | pr_debug("VFP: s%u = %08x\n", sn, n); | ||
1011 | |||
1012 | /* | ||
1013 | * Unpack and normalise denormals. | ||
1014 | */ | ||
1015 | vfp_single_unpack(&vsn, n); | ||
1016 | if (vsn.exponent == 0 && vsn.significand) | ||
1017 | vfp_single_normalise_denormal(&vsn); | ||
1018 | |||
1019 | vfp_single_unpack(&vsm, m); | ||
1020 | if (vsm.exponent == 0 && vsm.significand) | ||
1021 | vfp_single_normalise_denormal(&vsm); | ||
1022 | |||
1023 | exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); | ||
1024 | |||
1025 | return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); | ||
1026 | } | ||
1027 | |||
1028 | /* | ||
1029 | * sd = sn - sm | ||
1030 | */ | ||
1031 | static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) | ||
1032 | { | ||
1033 | /* | ||
1034 | * Subtraction is addition with one sign inverted. | ||
1035 | */ | ||
1036 | return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); | ||
1037 | } | ||
1038 | |||
1039 | /* | ||
1040 | * sd = sn / sm | ||
1041 | */ | ||
1042 | static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) | ||
1043 | { | ||
1044 | struct vfp_single vsd, vsn, vsm; | ||
1045 | u32 exceptions = 0; | ||
1046 | s32 n = vfp_get_float(sn); | ||
1047 | int tm, tn; | ||
1048 | |||
1049 | pr_debug("VFP: s%u = %08x\n", sn, n); | ||
1050 | |||
1051 | vfp_single_unpack(&vsn, n); | ||
1052 | vfp_single_unpack(&vsm, m); | ||
1053 | |||
1054 | vsd.sign = vsn.sign ^ vsm.sign; | ||
1055 | |||
1056 | tn = vfp_single_type(&vsn); | ||
1057 | tm = vfp_single_type(&vsm); | ||
1058 | |||
1059 | /* | ||
1060 | * Is n a NAN? | ||
1061 | */ | ||
1062 | if (tn & VFP_NAN) | ||
1063 | goto vsn_nan; | ||
1064 | |||
1065 | /* | ||
1066 | * Is m a NAN? | ||
1067 | */ | ||
1068 | if (tm & VFP_NAN) | ||
1069 | goto vsm_nan; | ||
1070 | |||
1071 | /* | ||
1072 | * If n and m are infinity, the result is invalid | ||
1073 | * If n and m are zero, the result is invalid | ||
1074 | */ | ||
1075 | if (tm & tn & (VFP_INFINITY|VFP_ZERO)) | ||
1076 | goto invalid; | ||
1077 | |||
1078 | /* | ||
1079 | * If n is infinity, the result is infinity | ||
1080 | */ | ||
1081 | if (tn & VFP_INFINITY) | ||
1082 | goto infinity; | ||
1083 | |||
1084 | /* | ||
1085 | * If m is zero, raise div0 exception | ||
1086 | */ | ||
1087 | if (tm & VFP_ZERO) | ||
1088 | goto divzero; | ||
1089 | |||
1090 | /* | ||
1091 | * If m is infinity, or n is zero, the result is zero | ||
1092 | */ | ||
1093 | if (tm & VFP_INFINITY || tn & VFP_ZERO) | ||
1094 | goto zero; | ||
1095 | |||
1096 | if (tn & VFP_DENORMAL) | ||
1097 | vfp_single_normalise_denormal(&vsn); | ||
1098 | if (tm & VFP_DENORMAL) | ||
1099 | vfp_single_normalise_denormal(&vsm); | ||
1100 | |||
1101 | /* | ||
1102 | * Ok, we have two numbers, we can perform division. | ||
1103 | */ | ||
1104 | vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; | ||
1105 | vsm.significand <<= 1; | ||
1106 | if (vsm.significand <= (2 * vsn.significand)) { | ||
1107 | vsn.significand >>= 1; | ||
1108 | vsd.exponent++; | ||
1109 | } | ||
1110 | vsd.significand = ((u64)vsn.significand << 32) / vsm.significand; | ||
1111 | if ((vsd.significand & 0x3f) == 0) | ||
1112 | vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); | ||
1113 | |||
1114 | return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); | ||
1115 | |||
1116 | vsn_nan: | ||
1117 | exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); | ||
1118 | pack: | ||
1119 | vfp_put_float(sd, vfp_single_pack(&vsd)); | ||
1120 | return exceptions; | ||
1121 | |||
1122 | vsm_nan: | ||
1123 | exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); | ||
1124 | goto pack; | ||
1125 | |||
1126 | zero: | ||
1127 | vsd.exponent = 0; | ||
1128 | vsd.significand = 0; | ||
1129 | goto pack; | ||
1130 | |||
1131 | divzero: | ||
1132 | exceptions = FPSCR_DZC; | ||
1133 | infinity: | ||
1134 | vsd.exponent = 255; | ||
1135 | vsd.significand = 0; | ||
1136 | goto pack; | ||
1137 | |||
1138 | invalid: | ||
1139 | vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan)); | ||
1140 | return FPSCR_IOC; | ||
1141 | } | ||
1142 | |||
1143 | static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { | ||
1144 | [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac, | ||
1145 | [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac, | ||
1146 | [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc, | ||
1147 | [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc, | ||
1148 | [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul, | ||
1149 | [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul, | ||
1150 | [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd, | ||
1151 | [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub, | ||
1152 | [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv, | ||
1153 | }; | ||
1154 | |||
1155 | #define FREG_BANK(x) ((x) & 0x18) | ||
1156 | #define FREG_IDX(x) ((x) & 7) | ||
1157 | |||
1158 | u32 vfp_single_cpdo(u32 inst, u32 fpscr) | ||
1159 | { | ||
1160 | u32 op = inst & FOP_MASK; | ||
1161 | u32 exceptions = 0; | ||
1162 | unsigned int sd = vfp_get_sd(inst); | ||
1163 | unsigned int sn = vfp_get_sn(inst); | ||
1164 | unsigned int sm = vfp_get_sm(inst); | ||
1165 | unsigned int vecitr, veclen, vecstride; | ||
1166 | u32 (*fop)(int, int, s32, u32); | ||
1167 | |||
1168 | veclen = fpscr & FPSCR_LENGTH_MASK; | ||
1169 | vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); | ||
1170 | |||
1171 | /* | ||
1172 | * If destination bank is zero, vector length is always '1'. | ||
1173 | * ARM DDI0100F C5.1.3, C5.3.2. | ||
1174 | */ | ||
1175 | if (FREG_BANK(sd) == 0) | ||
1176 | veclen = 0; | ||
1177 | |||
1178 | pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, | ||
1179 | (veclen >> FPSCR_LENGTH_BIT) + 1); | ||
1180 | |||
1181 | fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)]; | ||
1182 | if (!fop) | ||
1183 | goto invalid; | ||
1184 | |||
1185 | for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { | ||
1186 | s32 m = vfp_get_float(sm); | ||
1187 | u32 except; | ||
1188 | |||
1189 | if (op == FOP_EXT) | ||
1190 | pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", | ||
1191 | vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m); | ||
1192 | else | ||
1193 | pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", | ||
1194 | vecitr >> FPSCR_LENGTH_BIT, sd, sn, | ||
1195 | FOP_TO_IDX(op), sm, m); | ||
1196 | |||
1197 | except = fop(sd, sn, m, fpscr); | ||
1198 | pr_debug("VFP: itr%d: exceptions=%08x\n", | ||
1199 | vecitr >> FPSCR_LENGTH_BIT, except); | ||
1200 | |||
1201 | exceptions |= except; | ||
1202 | |||
1203 | /* | ||
1204 | * This ensures that comparisons only operate on scalars; | ||
1205 | * comparisons always return with one FPSCR status bit set. | ||
1206 | */ | ||
1207 | if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | ||
1208 | break; | ||
1209 | |||
1210 | /* | ||
1211 | * CHECK: It appears to be undefined whether we stop when | ||
1212 | * we encounter an exception. We continue. | ||
1213 | */ | ||
1214 | |||
1215 | sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7); | ||
1216 | sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); | ||
1217 | if (FREG_BANK(sm) != 0) | ||
1218 | sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); | ||
1219 | } | ||
1220 | return exceptions; | ||
1221 | |||
1222 | invalid: | ||
1223 | return (u32)-1; | ||
1224 | } | ||