diff options
author | Paul Mundt <lethal@linux-sh.org> | 2007-11-26 06:38:36 -0500 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2008-01-27 23:18:57 -0500 |
commit | 74d99a5e262229ee865f6f68528d10b82471ead6 (patch) | |
tree | 1e2382c8779e1ee06226f9cc9acbf9a63eb5fc44 /arch | |
parent | a8f67f4b4d4b74cd14d3540ade8657ebee543340 (diff) |
sh: SH-2A FPU support.
Signed-off-by: Kieran Bingham <kbingham@mpc-data.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sh/Kconfig | 2 | ||||
-rw-r--r-- | arch/sh/Makefile | 4 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh2/entry.S | 17 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh2a/Makefile | 2 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh2a/fpu.c | 633 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh3/ex.S | 2 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/fpu.c | 23 | ||||
-rw-r--r-- | arch/sh/kernel/traps_32.c | 13 |
8 files changed, 667 insertions, 29 deletions
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index c18a5512ac82..2dc3b177193c 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -161,6 +161,7 @@ config CPU_SUBTYPE_SH7619 | |||
161 | config CPU_SUBTYPE_SH7203 | 161 | config CPU_SUBTYPE_SH7203 |
162 | bool "Support SH7203 processor" | 162 | bool "Support SH7203 processor" |
163 | select CPU_SH2A | 163 | select CPU_SH2A |
164 | select CPU_HAS_FPU | ||
164 | 165 | ||
165 | config CPU_SUBTYPE_SH7206 | 166 | config CPU_SUBTYPE_SH7206 |
166 | bool "Support SH7206 processor" | 167 | bool "Support SH7206 processor" |
@@ -169,6 +170,7 @@ config CPU_SUBTYPE_SH7206 | |||
169 | config CPU_SUBTYPE_SH7263 | 170 | config CPU_SUBTYPE_SH7263 |
170 | bool "Support SH7263 processor" | 171 | bool "Support SH7263 processor" |
171 | select CPU_SH2A | 172 | select CPU_SH2A |
173 | select CPU_HAS_FPU | ||
172 | 174 | ||
173 | # SH-3 Processor Support | 175 | # SH-3 Processor Support |
174 | 176 | ||
diff --git a/arch/sh/Makefile b/arch/sh/Makefile index f7cbc13fd2a0..292d8618248d 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile | |||
@@ -20,10 +20,6 @@ isa-$(CONFIG_CPU_SH4AL_DSP) := sh4al | |||
20 | isa-$(CONFIG_CPU_SH5) := shmedia | 20 | isa-$(CONFIG_CPU_SH5) := shmedia |
21 | isa-$(CONFIG_SH_DSP) := $(isa-y)-dsp | 21 | isa-$(CONFIG_SH_DSP) := $(isa-y)-dsp |
22 | 22 | ||
23 | ifndef CONFIG_MMU | ||
24 | isa-y := $(isa-y)-nommu | ||
25 | endif | ||
26 | |||
27 | ifndef CONFIG_SH_DSP | 23 | ifndef CONFIG_SH_DSP |
28 | ifndef CONFIG_SH_FPU | 24 | ifndef CONFIG_SH_FPU |
29 | isa-y := $(isa-y)-nofpu | 25 | isa-y := $(isa-y)-nofpu |
diff --git a/arch/sh/kernel/cpu/sh2/entry.S b/arch/sh/kernel/cpu/sh2/entry.S index 4ff2334b4a38..7a26569e7956 100644 --- a/arch/sh/kernel/cpu/sh2/entry.S +++ b/arch/sh/kernel/cpu/sh2/entry.S | |||
@@ -149,6 +149,14 @@ ENTRY(exception_handler) | |||
149 | mov #32,r8 | 149 | mov #32,r8 |
150 | cmp/hs r8,r9 | 150 | cmp/hs r8,r9 |
151 | bt trap_entry ! 64 > vec >= 32 is trap | 151 | bt trap_entry ! 64 > vec >= 32 is trap |
152 | |||
153 | #if defined(CONFIG_SH_FPU) | ||
154 | mov #13,r8 | ||
155 | cmp/eq r8,r9 | ||
156 | bt 10f ! fpu | ||
157 | nop | ||
158 | #endif | ||
159 | |||
152 | mov.l 4f,r8 | 160 | mov.l 4f,r8 |
153 | mov r9,r4 | 161 | mov r9,r4 |
154 | shll2 r9 | 162 | shll2 r9 |
@@ -158,6 +166,10 @@ ENTRY(exception_handler) | |||
158 | cmp/eq r9,r8 | 166 | cmp/eq r9,r8 |
159 | bf 3f | 167 | bf 3f |
160 | mov.l 8f,r8 ! unhandled exception | 168 | mov.l 8f,r8 ! unhandled exception |
169 | #if defined(CONFIG_SH_FPU) | ||
170 | 10: | ||
171 | mov.l 9f, r8 ! unhandled exception | ||
172 | #endif | ||
161 | 3: | 173 | 3: |
162 | mov.l 5f,r10 | 174 | mov.l 5f,r10 |
163 | jmp @r8 | 175 | jmp @r8 |
@@ -177,7 +189,10 @@ interrupt_entry: | |||
177 | 6: .long ret_from_irq | 189 | 6: .long ret_from_irq |
178 | 7: .long do_IRQ | 190 | 7: .long do_IRQ |
179 | 8: .long do_exception_error | 191 | 8: .long do_exception_error |
180 | 192 | #ifdef CONFIG_SH_FPU | |
193 | 9: .long fpu_error_trap_handler | ||
194 | #endif | ||
195 | |||
181 | trap_entry: | 196 | trap_entry: |
182 | mov #0x30,r8 | 197 | mov #0x30,r8 |
183 | cmp/ge r8,r9 ! vector 0x20-0x2f is systemcall | 198 | cmp/ge r8,r9 ! vector 0x20-0x2f is systemcall |
diff --git a/arch/sh/kernel/cpu/sh2a/Makefile b/arch/sh/kernel/cpu/sh2a/Makefile index 50e4d0ffdd6f..b279cdc3a233 100644 --- a/arch/sh/kernel/cpu/sh2a/Makefile +++ b/arch/sh/kernel/cpu/sh2a/Makefile | |||
@@ -6,6 +6,8 @@ obj-y := common.o probe.o opcode_helper.o | |||
6 | 6 | ||
7 | common-y += $(addprefix ../sh2/, ex.o entry.o) | 7 | common-y += $(addprefix ../sh2/, ex.o entry.o) |
8 | 8 | ||
9 | obj-$(CONFIG_SH_FPU) += fpu.o | ||
10 | |||
9 | obj-$(CONFIG_CPU_SUBTYPE_SH7206) += setup-sh7206.o clock-sh7206.o | 11 | obj-$(CONFIG_CPU_SUBTYPE_SH7206) += setup-sh7206.o clock-sh7206.o |
10 | obj-$(CONFIG_CPU_SUBTYPE_SH7203) += setup-sh7203.o clock-sh7203.o | 12 | obj-$(CONFIG_CPU_SUBTYPE_SH7203) += setup-sh7203.o clock-sh7203.o |
11 | obj-$(CONFIG_CPU_SUBTYPE_SH7263) += setup-sh7203.o clock-sh7203.o | 13 | obj-$(CONFIG_CPU_SUBTYPE_SH7263) += setup-sh7203.o clock-sh7203.o |
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c new file mode 100644 index 000000000000..ff99562456fb --- /dev/null +++ b/arch/sh/kernel/cpu/sh2a/fpu.c | |||
@@ -0,0 +1,633 @@ | |||
1 | /* | ||
2 | * Save/restore floating point context for signal handlers. | ||
3 | * | ||
4 | * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka | ||
5 | * | ||
6 | * This file is subject to the terms and conditions of the GNU General Public | ||
7 | * License. See the file "COPYING" in the main directory of this archive | ||
8 | * for more details. | ||
9 | * | ||
10 | * FIXME! These routines can be optimized in big endian case. | ||
11 | */ | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/signal.h> | ||
14 | #include <asm/processor.h> | ||
15 | #include <asm/io.h> | ||
16 | |||
17 | /* The PR (precision) bit in the FP Status Register must be clear when | ||
18 | * an frchg instruction is executed, otherwise the instruction is undefined. | ||
19 | * Executing frchg with PR set causes a trap on some SH4 implementations. | ||
20 | */ | ||
21 | |||
22 | #define FPSCR_RCHG 0x00000000 | ||
23 | |||
24 | |||
25 | /* | ||
26 | * Save FPU registers onto task structure. | ||
27 | * Assume called with FPU enabled (SR.FD=0). | ||
28 | */ | ||
29 | void | ||
30 | save_fpu(struct task_struct *tsk, struct pt_regs *regs) | ||
31 | { | ||
32 | unsigned long dummy; | ||
33 | |||
34 | clear_tsk_thread_flag(tsk, TIF_USEDFPU); | ||
35 | enable_fpu(); | ||
36 | asm volatile("sts.l fpul, @-%0\n\t" | ||
37 | "sts.l fpscr, @-%0\n\t" | ||
38 | "fmov.s fr15, @-%0\n\t" | ||
39 | "fmov.s fr14, @-%0\n\t" | ||
40 | "fmov.s fr13, @-%0\n\t" | ||
41 | "fmov.s fr12, @-%0\n\t" | ||
42 | "fmov.s fr11, @-%0\n\t" | ||
43 | "fmov.s fr10, @-%0\n\t" | ||
44 | "fmov.s fr9, @-%0\n\t" | ||
45 | "fmov.s fr8, @-%0\n\t" | ||
46 | "fmov.s fr7, @-%0\n\t" | ||
47 | "fmov.s fr6, @-%0\n\t" | ||
48 | "fmov.s fr5, @-%0\n\t" | ||
49 | "fmov.s fr4, @-%0\n\t" | ||
50 | "fmov.s fr3, @-%0\n\t" | ||
51 | "fmov.s fr2, @-%0\n\t" | ||
52 | "fmov.s fr1, @-%0\n\t" | ||
53 | "fmov.s fr0, @-%0\n\t" | ||
54 | "lds %3, fpscr\n\t" | ||
55 | : "=r" (dummy) | ||
56 | : "0" ((char *)(&tsk->thread.fpu.hard.status)), | ||
57 | "r" (FPSCR_RCHG), | ||
58 | "r" (FPSCR_INIT) | ||
59 | : "memory"); | ||
60 | |||
61 | disable_fpu(); | ||
62 | release_fpu(regs); | ||
63 | } | ||
64 | |||
65 | static void | ||
66 | restore_fpu(struct task_struct *tsk) | ||
67 | { | ||
68 | unsigned long dummy; | ||
69 | |||
70 | enable_fpu(); | ||
71 | asm volatile("fmov.s @%0+, fr0\n\t" | ||
72 | "fmov.s @%0+, fr1\n\t" | ||
73 | "fmov.s @%0+, fr2\n\t" | ||
74 | "fmov.s @%0+, fr3\n\t" | ||
75 | "fmov.s @%0+, fr4\n\t" | ||
76 | "fmov.s @%0+, fr5\n\t" | ||
77 | "fmov.s @%0+, fr6\n\t" | ||
78 | "fmov.s @%0+, fr7\n\t" | ||
79 | "fmov.s @%0+, fr8\n\t" | ||
80 | "fmov.s @%0+, fr9\n\t" | ||
81 | "fmov.s @%0+, fr10\n\t" | ||
82 | "fmov.s @%0+, fr11\n\t" | ||
83 | "fmov.s @%0+, fr12\n\t" | ||
84 | "fmov.s @%0+, fr13\n\t" | ||
85 | "fmov.s @%0+, fr14\n\t" | ||
86 | "fmov.s @%0+, fr15\n\t" | ||
87 | "lds.l @%0+, fpscr\n\t" | ||
88 | "lds.l @%0+, fpul\n\t" | ||
89 | : "=r" (dummy) | ||
90 | : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG) | ||
91 | : "memory"); | ||
92 | disable_fpu(); | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Load the FPU with signalling NANS. This bit pattern we're using | ||
97 | * has the property that no matter wether considered as single or as | ||
98 | * double precission represents signaling NANS. | ||
99 | */ | ||
100 | |||
101 | static void | ||
102 | fpu_init(void) | ||
103 | { | ||
104 | enable_fpu(); | ||
105 | asm volatile("lds %0, fpul\n\t" | ||
106 | "fsts fpul, fr0\n\t" | ||
107 | "fsts fpul, fr1\n\t" | ||
108 | "fsts fpul, fr2\n\t" | ||
109 | "fsts fpul, fr3\n\t" | ||
110 | "fsts fpul, fr4\n\t" | ||
111 | "fsts fpul, fr5\n\t" | ||
112 | "fsts fpul, fr6\n\t" | ||
113 | "fsts fpul, fr7\n\t" | ||
114 | "fsts fpul, fr8\n\t" | ||
115 | "fsts fpul, fr9\n\t" | ||
116 | "fsts fpul, fr10\n\t" | ||
117 | "fsts fpul, fr11\n\t" | ||
118 | "fsts fpul, fr12\n\t" | ||
119 | "fsts fpul, fr13\n\t" | ||
120 | "fsts fpul, fr14\n\t" | ||
121 | "fsts fpul, fr15\n\t" | ||
122 | "lds %2, fpscr\n\t" | ||
123 | : /* no output */ | ||
124 | : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); | ||
125 | disable_fpu(); | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Emulate arithmetic ops on denormalized number for some FPU insns. | ||
130 | */ | ||
131 | |||
132 | /* denormalized float * float */ | ||
133 | static int denormal_mulf(int hx, int hy) | ||
134 | { | ||
135 | unsigned int ix, iy; | ||
136 | unsigned long long m, n; | ||
137 | int exp, w; | ||
138 | |||
139 | ix = hx & 0x7fffffff; | ||
140 | iy = hy & 0x7fffffff; | ||
141 | if (iy < 0x00800000 || ix == 0) | ||
142 | return ((hx ^ hy) & 0x80000000); | ||
143 | |||
144 | exp = (iy & 0x7f800000) >> 23; | ||
145 | ix &= 0x007fffff; | ||
146 | iy = (iy & 0x007fffff) | 0x00800000; | ||
147 | m = (unsigned long long)ix * iy; | ||
148 | n = m; | ||
149 | w = -1; | ||
150 | while (n) { n >>= 1; w++; } | ||
151 | |||
152 | /* FIXME: use guard bits */ | ||
153 | exp += w - 126 - 46; | ||
154 | if (exp > 0) | ||
155 | ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23); | ||
156 | else if (exp + 22 >= 0) | ||
157 | ix = (int) (m >> (w - 22 - exp)) & 0x007fffff; | ||
158 | else | ||
159 | ix = 0; | ||
160 | |||
161 | ix |= (hx ^ hy) & 0x80000000; | ||
162 | return ix; | ||
163 | } | ||
164 | |||
165 | /* denormalized double * double */ | ||
166 | static void mult64(unsigned long long x, unsigned long long y, | ||
167 | unsigned long long *highp, unsigned long long *lowp) | ||
168 | { | ||
169 | unsigned long long sub0, sub1, sub2, sub3; | ||
170 | unsigned long long high, low; | ||
171 | |||
172 | sub0 = (x >> 32) * (unsigned long) (y >> 32); | ||
173 | sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32); | ||
174 | sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL); | ||
175 | sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL); | ||
176 | low = sub3; | ||
177 | high = 0LL; | ||
178 | sub3 += (sub1 << 32); | ||
179 | if (low > sub3) | ||
180 | high++; | ||
181 | low = sub3; | ||
182 | sub3 += (sub2 << 32); | ||
183 | if (low > sub3) | ||
184 | high++; | ||
185 | low = sub3; | ||
186 | high += (sub1 >> 32) + (sub2 >> 32); | ||
187 | high += sub0; | ||
188 | *lowp = low; | ||
189 | *highp = high; | ||
190 | } | ||
191 | |||
192 | static inline long long rshift64(unsigned long long mh, | ||
193 | unsigned long long ml, int n) | ||
194 | { | ||
195 | if (n >= 64) | ||
196 | return mh >> (n - 64); | ||
197 | return (mh << (64 - n)) | (ml >> n); | ||
198 | } | ||
199 | |||
200 | static long long denormal_muld(long long hx, long long hy) | ||
201 | { | ||
202 | unsigned long long ix, iy; | ||
203 | unsigned long long mh, ml, nh, nl; | ||
204 | int exp, w; | ||
205 | |||
206 | ix = hx & 0x7fffffffffffffffLL; | ||
207 | iy = hy & 0x7fffffffffffffffLL; | ||
208 | if (iy < 0x0010000000000000LL || ix == 0) | ||
209 | return ((hx ^ hy) & 0x8000000000000000LL); | ||
210 | |||
211 | exp = (iy & 0x7ff0000000000000LL) >> 52; | ||
212 | ix &= 0x000fffffffffffffLL; | ||
213 | iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL; | ||
214 | mult64(ix, iy, &mh, &ml); | ||
215 | nh = mh; | ||
216 | nl = ml; | ||
217 | w = -1; | ||
218 | if (nh) { | ||
219 | while (nh) { nh >>= 1; w++;} | ||
220 | w += 64; | ||
221 | } else | ||
222 | while (nl) { nl >>= 1; w++;} | ||
223 | |||
224 | /* FIXME: use guard bits */ | ||
225 | exp += w - 1022 - 52 * 2; | ||
226 | if (exp > 0) | ||
227 | ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL) | ||
228 | | ((long long)exp << 52); | ||
229 | else if (exp + 51 >= 0) | ||
230 | ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL; | ||
231 | else | ||
232 | ix = 0; | ||
233 | |||
234 | ix |= (hx ^ hy) & 0x8000000000000000LL; | ||
235 | return ix; | ||
236 | } | ||
237 | |||
238 | /* ix - iy where iy: denormal and ix, iy >= 0 */ | ||
239 | static int denormal_subf1(unsigned int ix, unsigned int iy) | ||
240 | { | ||
241 | int frac; | ||
242 | int exp; | ||
243 | |||
244 | if (ix < 0x00800000) | ||
245 | return ix - iy; | ||
246 | |||
247 | exp = (ix & 0x7f800000) >> 23; | ||
248 | if (exp - 1 > 31) | ||
249 | return ix; | ||
250 | iy >>= exp - 1; | ||
251 | if (iy == 0) | ||
252 | return ix; | ||
253 | |||
254 | frac = (ix & 0x007fffff) | 0x00800000; | ||
255 | frac -= iy; | ||
256 | while (frac < 0x00800000) { | ||
257 | if (--exp == 0) | ||
258 | return frac; | ||
259 | frac <<= 1; | ||
260 | } | ||
261 | |||
262 | return (exp << 23) | (frac & 0x007fffff); | ||
263 | } | ||
264 | |||
265 | /* ix + iy where iy: denormal and ix, iy >= 0 */ | ||
266 | static int denormal_addf1(unsigned int ix, unsigned int iy) | ||
267 | { | ||
268 | int frac; | ||
269 | int exp; | ||
270 | |||
271 | if (ix < 0x00800000) | ||
272 | return ix + iy; | ||
273 | |||
274 | exp = (ix & 0x7f800000) >> 23; | ||
275 | if (exp - 1 > 31) | ||
276 | return ix; | ||
277 | iy >>= exp - 1; | ||
278 | if (iy == 0) | ||
279 | return ix; | ||
280 | |||
281 | frac = (ix & 0x007fffff) | 0x00800000; | ||
282 | frac += iy; | ||
283 | if (frac >= 0x01000000) { | ||
284 | frac >>= 1; | ||
285 | ++exp; | ||
286 | } | ||
287 | |||
288 | return (exp << 23) | (frac & 0x007fffff); | ||
289 | } | ||
290 | |||
291 | static int denormal_addf(int hx, int hy) | ||
292 | { | ||
293 | unsigned int ix, iy; | ||
294 | int sign; | ||
295 | |||
296 | if ((hx ^ hy) & 0x80000000) { | ||
297 | sign = hx & 0x80000000; | ||
298 | ix = hx & 0x7fffffff; | ||
299 | iy = hy & 0x7fffffff; | ||
300 | if (iy < 0x00800000) { | ||
301 | ix = denormal_subf1(ix, iy); | ||
302 | if (ix < 0) { | ||
303 | ix = -ix; | ||
304 | sign ^= 0x80000000; | ||
305 | } | ||
306 | } else { | ||
307 | ix = denormal_subf1(iy, ix); | ||
308 | sign ^= 0x80000000; | ||
309 | } | ||
310 | } else { | ||
311 | sign = hx & 0x80000000; | ||
312 | ix = hx & 0x7fffffff; | ||
313 | iy = hy & 0x7fffffff; | ||
314 | if (iy < 0x00800000) | ||
315 | ix = denormal_addf1(ix, iy); | ||
316 | else | ||
317 | ix = denormal_addf1(iy, ix); | ||
318 | } | ||
319 | |||
320 | return sign | ix; | ||
321 | } | ||
322 | |||
323 | /* ix - iy where iy: denormal and ix, iy >= 0 */ | ||
324 | static long long denormal_subd1(unsigned long long ix, unsigned long long iy) | ||
325 | { | ||
326 | long long frac; | ||
327 | int exp; | ||
328 | |||
329 | if (ix < 0x0010000000000000LL) | ||
330 | return ix - iy; | ||
331 | |||
332 | exp = (ix & 0x7ff0000000000000LL) >> 52; | ||
333 | if (exp - 1 > 63) | ||
334 | return ix; | ||
335 | iy >>= exp - 1; | ||
336 | if (iy == 0) | ||
337 | return ix; | ||
338 | |||
339 | frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL; | ||
340 | frac -= iy; | ||
341 | while (frac < 0x0010000000000000LL) { | ||
342 | if (--exp == 0) | ||
343 | return frac; | ||
344 | frac <<= 1; | ||
345 | } | ||
346 | |||
347 | return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL); | ||
348 | } | ||
349 | |||
350 | /* ix + iy where iy: denormal and ix, iy >= 0 */ | ||
351 | static long long denormal_addd1(unsigned long long ix, unsigned long long iy) | ||
352 | { | ||
353 | long long frac; | ||
354 | long long exp; | ||
355 | |||
356 | if (ix < 0x0010000000000000LL) | ||
357 | return ix + iy; | ||
358 | |||
359 | exp = (ix & 0x7ff0000000000000LL) >> 52; | ||
360 | if (exp - 1 > 63) | ||
361 | return ix; | ||
362 | iy >>= exp - 1; | ||
363 | if (iy == 0) | ||
364 | return ix; | ||
365 | |||
366 | frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL; | ||
367 | frac += iy; | ||
368 | if (frac >= 0x0020000000000000LL) { | ||
369 | frac >>= 1; | ||
370 | ++exp; | ||
371 | } | ||
372 | |||
373 | return (exp << 52) | (frac & 0x000fffffffffffffLL); | ||
374 | } | ||
375 | |||
376 | static long long denormal_addd(long long hx, long long hy) | ||
377 | { | ||
378 | unsigned long long ix, iy; | ||
379 | long long sign; | ||
380 | |||
381 | if ((hx ^ hy) & 0x8000000000000000LL) { | ||
382 | sign = hx & 0x8000000000000000LL; | ||
383 | ix = hx & 0x7fffffffffffffffLL; | ||
384 | iy = hy & 0x7fffffffffffffffLL; | ||
385 | if (iy < 0x0010000000000000LL) { | ||
386 | ix = denormal_subd1(ix, iy); | ||
387 | if (ix < 0) { | ||
388 | ix = -ix; | ||
389 | sign ^= 0x8000000000000000LL; | ||
390 | } | ||
391 | } else { | ||
392 | ix = denormal_subd1(iy, ix); | ||
393 | sign ^= 0x8000000000000000LL; | ||
394 | } | ||
395 | } else { | ||
396 | sign = hx & 0x8000000000000000LL; | ||
397 | ix = hx & 0x7fffffffffffffffLL; | ||
398 | iy = hy & 0x7fffffffffffffffLL; | ||
399 | if (iy < 0x0010000000000000LL) | ||
400 | ix = denormal_addd1(ix, iy); | ||
401 | else | ||
402 | ix = denormal_addd1(iy, ix); | ||
403 | } | ||
404 | |||
405 | return sign | ix; | ||
406 | } | ||
407 | |||
408 | /** | ||
409 | * denormal_to_double - Given denormalized float number, | ||
410 | * store double float | ||
411 | * | ||
412 | * @fpu: Pointer to sh_fpu_hard structure | ||
413 | * @n: Index to FP register | ||
414 | */ | ||
415 | static void | ||
416 | denormal_to_double (struct sh_fpu_hard_struct *fpu, int n) | ||
417 | { | ||
418 | unsigned long du, dl; | ||
419 | unsigned long x = fpu->fpul; | ||
420 | int exp = 1023 - 126; | ||
421 | |||
422 | if (x != 0 && (x & 0x7f800000) == 0) { | ||
423 | du = (x & 0x80000000); | ||
424 | while ((x & 0x00800000) == 0) { | ||
425 | x <<= 1; | ||
426 | exp--; | ||
427 | } | ||
428 | x &= 0x007fffff; | ||
429 | du |= (exp << 20) | (x >> 3); | ||
430 | dl = x << 29; | ||
431 | |||
432 | fpu->fp_regs[n] = du; | ||
433 | fpu->fp_regs[n+1] = dl; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * ieee_fpe_handler - Handle denormalized number exception | ||
439 | * | ||
440 | * @regs: Pointer to register structure | ||
441 | * | ||
442 | * Returns 1 when it's handled (should not cause exception). | ||
443 | */ | ||
444 | static int | ||
445 | ieee_fpe_handler (struct pt_regs *regs) | ||
446 | { | ||
447 | unsigned short insn = *(unsigned short *) regs->pc; | ||
448 | unsigned short finsn; | ||
449 | unsigned long nextpc; | ||
450 | int nib[4] = { | ||
451 | (insn >> 12) & 0xf, | ||
452 | (insn >> 8) & 0xf, | ||
453 | (insn >> 4) & 0xf, | ||
454 | insn & 0xf}; | ||
455 | |||
456 | if (nib[0] == 0xb || | ||
457 | (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ | ||
458 | regs->pr = regs->pc + 4; | ||
459 | if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ | ||
460 | nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); | ||
461 | finsn = *(unsigned short *) (regs->pc + 2); | ||
462 | } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ | ||
463 | if (regs->sr & 1) | ||
464 | nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); | ||
465 | else | ||
466 | nextpc = regs->pc + 4; | ||
467 | finsn = *(unsigned short *) (regs->pc + 2); | ||
468 | } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ | ||
469 | if (regs->sr & 1) | ||
470 | nextpc = regs->pc + 4; | ||
471 | else | ||
472 | nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); | ||
473 | finsn = *(unsigned short *) (regs->pc + 2); | ||
474 | } else if (nib[0] == 0x4 && nib[3] == 0xb && | ||
475 | (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ | ||
476 | nextpc = regs->regs[nib[1]]; | ||
477 | finsn = *(unsigned short *) (regs->pc + 2); | ||
478 | } else if (nib[0] == 0x0 && nib[3] == 0x3 && | ||
479 | (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ | ||
480 | nextpc = regs->pc + 4 + regs->regs[nib[1]]; | ||
481 | finsn = *(unsigned short *) (regs->pc + 2); | ||
482 | } else if (insn == 0x000b) { /* rts */ | ||
483 | nextpc = regs->pr; | ||
484 | finsn = *(unsigned short *) (regs->pc + 2); | ||
485 | } else { | ||
486 | nextpc = regs->pc + 2; | ||
487 | finsn = insn; | ||
488 | } | ||
489 | |||
490 | #define FPSCR_FPU_ERROR (1 << 17) | ||
491 | |||
492 | if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ | ||
493 | struct task_struct *tsk = current; | ||
494 | |||
495 | if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) { | ||
496 | /* FPU error */ | ||
497 | denormal_to_double (&tsk->thread.fpu.hard, | ||
498 | (finsn >> 8) & 0xf); | ||
499 | } else | ||
500 | return 0; | ||
501 | |||
502 | regs->pc = nextpc; | ||
503 | return 1; | ||
504 | } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */ | ||
505 | struct task_struct *tsk = current; | ||
506 | int fpscr; | ||
507 | int n, m, prec; | ||
508 | unsigned int hx, hy; | ||
509 | |||
510 | n = (finsn >> 8) & 0xf; | ||
511 | m = (finsn >> 4) & 0xf; | ||
512 | hx = tsk->thread.fpu.hard.fp_regs[n]; | ||
513 | hy = tsk->thread.fpu.hard.fp_regs[m]; | ||
514 | fpscr = tsk->thread.fpu.hard.fpscr; | ||
515 | prec = fpscr & (1 << 19); | ||
516 | |||
517 | if ((fpscr & FPSCR_FPU_ERROR) | ||
518 | && (prec && ((hx & 0x7fffffff) < 0x00100000 | ||
519 | || (hy & 0x7fffffff) < 0x00100000))) { | ||
520 | long long llx, lly; | ||
521 | |||
522 | /* FPU error because of denormal */ | ||
523 | llx = ((long long) hx << 32) | ||
524 | | tsk->thread.fpu.hard.fp_regs[n+1]; | ||
525 | lly = ((long long) hy << 32) | ||
526 | | tsk->thread.fpu.hard.fp_regs[m+1]; | ||
527 | if ((hx & 0x7fffffff) >= 0x00100000) | ||
528 | llx = denormal_muld(lly, llx); | ||
529 | else | ||
530 | llx = denormal_muld(llx, lly); | ||
531 | tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; | ||
532 | tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; | ||
533 | } else if ((fpscr & FPSCR_FPU_ERROR) | ||
534 | && (!prec && ((hx & 0x7fffffff) < 0x00800000 | ||
535 | || (hy & 0x7fffffff) < 0x00800000))) { | ||
536 | /* FPU error because of denormal */ | ||
537 | if ((hx & 0x7fffffff) >= 0x00800000) | ||
538 | hx = denormal_mulf(hy, hx); | ||
539 | else | ||
540 | hx = denormal_mulf(hx, hy); | ||
541 | tsk->thread.fpu.hard.fp_regs[n] = hx; | ||
542 | } else | ||
543 | return 0; | ||
544 | |||
545 | regs->pc = nextpc; | ||
546 | return 1; | ||
547 | } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */ | ||
548 | struct task_struct *tsk = current; | ||
549 | int fpscr; | ||
550 | int n, m, prec; | ||
551 | unsigned int hx, hy; | ||
552 | |||
553 | n = (finsn >> 8) & 0xf; | ||
554 | m = (finsn >> 4) & 0xf; | ||
555 | hx = tsk->thread.fpu.hard.fp_regs[n]; | ||
556 | hy = tsk->thread.fpu.hard.fp_regs[m]; | ||
557 | fpscr = tsk->thread.fpu.hard.fpscr; | ||
558 | prec = fpscr & (1 << 19); | ||
559 | |||
560 | if ((fpscr & FPSCR_FPU_ERROR) | ||
561 | && (prec && ((hx & 0x7fffffff) < 0x00100000 | ||
562 | || (hy & 0x7fffffff) < 0x00100000))) { | ||
563 | long long llx, lly; | ||
564 | |||
565 | /* FPU error because of denormal */ | ||
566 | llx = ((long long) hx << 32) | ||
567 | | tsk->thread.fpu.hard.fp_regs[n+1]; | ||
568 | lly = ((long long) hy << 32) | ||
569 | | tsk->thread.fpu.hard.fp_regs[m+1]; | ||
570 | if ((finsn & 0xf00f) == 0xf000) | ||
571 | llx = denormal_addd(llx, lly); | ||
572 | else | ||
573 | llx = denormal_addd(llx, lly ^ (1LL << 63)); | ||
574 | tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; | ||
575 | tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; | ||
576 | } else if ((fpscr & FPSCR_FPU_ERROR) | ||
577 | && (!prec && ((hx & 0x7fffffff) < 0x00800000 | ||
578 | || (hy & 0x7fffffff) < 0x00800000))) { | ||
579 | /* FPU error because of denormal */ | ||
580 | if ((finsn & 0xf00f) == 0xf000) | ||
581 | hx = denormal_addf(hx, hy); | ||
582 | else | ||
583 | hx = denormal_addf(hx, hy ^ 0x80000000); | ||
584 | tsk->thread.fpu.hard.fp_regs[n] = hx; | ||
585 | } else | ||
586 | return 0; | ||
587 | |||
588 | regs->pc = nextpc; | ||
589 | return 1; | ||
590 | } | ||
591 | |||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | BUILD_TRAP_HANDLER(fpu_error) | ||
596 | { | ||
597 | struct task_struct *tsk = current; | ||
598 | TRAP_HANDLER_DECL; | ||
599 | |||
600 | save_fpu(tsk, regs); | ||
601 | if (ieee_fpe_handler(regs)) { | ||
602 | tsk->thread.fpu.hard.fpscr &= | ||
603 | ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); | ||
604 | grab_fpu(regs); | ||
605 | restore_fpu(tsk); | ||
606 | set_tsk_thread_flag(tsk, TIF_USEDFPU); | ||
607 | return; | ||
608 | } | ||
609 | |||
610 | force_sig(SIGFPE, tsk); | ||
611 | } | ||
612 | |||
613 | BUILD_TRAP_HANDLER(fpu_state_restore) | ||
614 | { | ||
615 | struct task_struct *tsk = current; | ||
616 | TRAP_HANDLER_DECL; | ||
617 | |||
618 | grab_fpu(regs); | ||
619 | if (!user_mode(regs)) { | ||
620 | printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); | ||
621 | return; | ||
622 | } | ||
623 | |||
624 | if (used_math()) { | ||
625 | /* Using the FPU again. */ | ||
626 | restore_fpu(tsk); | ||
627 | } else { | ||
628 | /* First time FPU user. */ | ||
629 | fpu_init(); | ||
630 | set_used_math(); | ||
631 | } | ||
632 | set_tsk_thread_flag(tsk, TIF_USEDFPU); | ||
633 | } | ||
diff --git a/arch/sh/kernel/cpu/sh3/ex.S b/arch/sh/kernel/cpu/sh3/ex.S index b6abf38d3a8d..11b6d9c6edae 100644 --- a/arch/sh/kernel/cpu/sh3/ex.S +++ b/arch/sh/kernel/cpu/sh3/ex.S | |||
@@ -36,7 +36,7 @@ ENTRY(exception_handling_table) | |||
36 | .long exception_error ! address error store /* 100 */ | 36 | .long exception_error ! address error store /* 100 */ |
37 | #endif | 37 | #endif |
38 | #if defined(CONFIG_SH_FPU) | 38 | #if defined(CONFIG_SH_FPU) |
39 | .long do_fpu_error /* 120 */ | 39 | .long fpu_error_trap_handler /* 120 */ |
40 | #else | 40 | #else |
41 | .long exception_error /* 120 */ | 41 | .long exception_error /* 120 */ |
42 | #endif | 42 | #endif |
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index c5a4fc77fa06..e624180b4467 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c | |||
@@ -82,8 +82,8 @@ save_fpu(struct task_struct *tsk, struct pt_regs *regs) | |||
82 | "r" (FPSCR_INIT) | 82 | "r" (FPSCR_INIT) |
83 | : "memory"); | 83 | : "memory"); |
84 | 84 | ||
85 | disable_fpu(); | 85 | disable_fpu(); |
86 | release_fpu(regs); | 86 | release_fpu(regs); |
87 | } | 87 | } |
88 | 88 | ||
89 | static void | 89 | static void |
@@ -91,7 +91,7 @@ restore_fpu(struct task_struct *tsk) | |||
91 | { | 91 | { |
92 | unsigned long dummy; | 92 | unsigned long dummy; |
93 | 93 | ||
94 | enable_fpu(); | 94 | enable_fpu(); |
95 | asm volatile("lds %2, fpscr\n\t" | 95 | asm volatile("lds %2, fpscr\n\t" |
96 | "fmov.s @%0+, fr0\n\t" | 96 | "fmov.s @%0+, fr0\n\t" |
97 | "fmov.s @%0+, fr1\n\t" | 97 | "fmov.s @%0+, fr1\n\t" |
@@ -138,7 +138,7 @@ restore_fpu(struct task_struct *tsk) | |||
138 | /* | 138 | /* |
139 | * Load the FPU with signalling NANS. This bit pattern we're using | 139 | * Load the FPU with signalling NANS. This bit pattern we're using |
140 | * has the property that no matter wether considered as single or as | 140 | * has the property that no matter wether considered as single or as |
141 | * double precision represents signaling NANS. | 141 | * double precision represents signaling NANS. |
142 | */ | 142 | */ |
143 | 143 | ||
144 | static void | 144 | static void |
@@ -184,7 +184,7 @@ fpu_init(void) | |||
184 | "lds %2, fpscr\n\t" | 184 | "lds %2, fpscr\n\t" |
185 | : /* no output */ | 185 | : /* no output */ |
186 | : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); | 186 | : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); |
187 | disable_fpu(); | 187 | disable_fpu(); |
188 | } | 188 | } |
189 | 189 | ||
190 | /** | 190 | /** |
@@ -238,7 +238,6 @@ ieee_fpe_handler (struct pt_regs *regs) | |||
238 | if (nib[0] == 0xb || | 238 | if (nib[0] == 0xb || |
239 | (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ | 239 | (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ |
240 | regs->pr = regs->pc + 4; | 240 | regs->pr = regs->pc + 4; |
241 | |||
242 | if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ | 241 | if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ |
243 | nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); | 242 | nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); |
244 | finsn = *(unsigned short *) (regs->pc + 2); | 243 | finsn = *(unsigned short *) (regs->pc + 2); |
@@ -293,12 +292,10 @@ ieee_fpe_handler (struct pt_regs *regs) | |||
293 | return 0; | 292 | return 0; |
294 | } | 293 | } |
295 | 294 | ||
296 | asmlinkage void | 295 | BUILD_TRAP_HANDLER(fpu_error) |
297 | do_fpu_error(unsigned long r4, unsigned long r5, unsigned long r6, | ||
298 | unsigned long r7, struct pt_regs __regs) | ||
299 | { | 296 | { |
300 | struct pt_regs *regs = RELOC_HIDE(&__regs, 0); | ||
301 | struct task_struct *tsk = current; | 297 | struct task_struct *tsk = current; |
298 | TRAP_HANDLER_DECL; | ||
302 | 299 | ||
303 | if (ieee_fpe_handler(regs)) | 300 | if (ieee_fpe_handler(regs)) |
304 | return; | 301 | return; |
@@ -308,12 +305,10 @@ do_fpu_error(unsigned long r4, unsigned long r5, unsigned long r6, | |||
308 | force_sig(SIGFPE, tsk); | 305 | force_sig(SIGFPE, tsk); |
309 | } | 306 | } |
310 | 307 | ||
311 | asmlinkage void | 308 | BUILD_TRAP_HANDLER(fpu_state_restore) |
312 | do_fpu_state_restore(unsigned long r4, unsigned long r5, unsigned long r6, | ||
313 | unsigned long r7, struct pt_regs __regs) | ||
314 | { | 309 | { |
315 | struct pt_regs *regs = RELOC_HIDE(&__regs, 0); | ||
316 | struct task_struct *tsk = current; | 310 | struct task_struct *tsk = current; |
311 | TRAP_HANDLER_DECL; | ||
317 | 312 | ||
318 | grab_fpu(regs); | 313 | grab_fpu(regs); |
319 | if (!user_mode(regs)) { | 314 | if (!user_mode(regs)) { |
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 0d05fb3c48e3..2e58f7a6b746 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c | |||
@@ -662,11 +662,6 @@ asmlinkage void do_divide_error(unsigned long r4, unsigned long r5, | |||
662 | } | 662 | } |
663 | #endif | 663 | #endif |
664 | 664 | ||
665 | /* arch/sh/kernel/cpu/sh4/fpu.c */ | ||
666 | extern int do_fpu_inst(unsigned short, struct pt_regs *); | ||
667 | extern asmlinkage void do_fpu_state_restore(unsigned long r4, unsigned long r5, | ||
668 | unsigned long r6, unsigned long r7, struct pt_regs __regs); | ||
669 | |||
670 | asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5, | 665 | asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5, |
671 | unsigned long r6, unsigned long r7, | 666 | unsigned long r6, unsigned long r7, |
672 | struct pt_regs __regs) | 667 | struct pt_regs __regs) |
@@ -853,11 +848,11 @@ void __init trap_init(void) | |||
853 | set_exception_table_evt(0x820, do_illegal_slot_inst); | 848 | set_exception_table_evt(0x820, do_illegal_slot_inst); |
854 | #elif defined(CONFIG_SH_FPU) | 849 | #elif defined(CONFIG_SH_FPU) |
855 | #ifdef CONFIG_CPU_SUBTYPE_SHX3 | 850 | #ifdef CONFIG_CPU_SUBTYPE_SHX3 |
856 | set_exception_table_evt(0xd80, do_fpu_state_restore); | 851 | set_exception_table_evt(0xd80, fpu_state_restore_trap_handler); |
857 | set_exception_table_evt(0xda0, do_fpu_state_restore); | 852 | set_exception_table_evt(0xda0, fpu_state_restore_trap_handler); |
858 | #else | 853 | #else |
859 | set_exception_table_evt(0x800, do_fpu_state_restore); | 854 | set_exception_table_evt(0x800, fpu_state_restore_trap_handler); |
860 | set_exception_table_evt(0x820, do_fpu_state_restore); | 855 | set_exception_table_evt(0x820, fpu_state_restore_trap_handler); |
861 | #endif | 856 | #endif |
862 | #endif | 857 | #endif |
863 | 858 | ||